aboutsummaryrefslogtreecommitdiff
path: root/src/mlia/nn/rewrite/core/graph_edit/diff.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/mlia/nn/rewrite/core/graph_edit/diff.py')
-rw-r--r--src/mlia/nn/rewrite/core/graph_edit/diff.py109
1 files changed, 109 insertions, 0 deletions
diff --git a/src/mlia/nn/rewrite/core/graph_edit/diff.py b/src/mlia/nn/rewrite/core/graph_edit/diff.py
new file mode 100644
index 0000000..b6c9616
--- /dev/null
+++ b/src/mlia/nn/rewrite/core/graph_edit/diff.py
@@ -0,0 +1,109 @@
+# SPDX-FileCopyrightText: Copyright 2023, Arm Limited and/or its affiliates.
+# SPDX-License-Identifier: Apache-2.0
+import os
+from collections import defaultdict
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+import tensorflow as tf
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+import numpy as np
+from tensorflow.lite.python import interpreter as interpreter_wrapper
+from mlia.nn.rewrite.core.utils.numpy_tfrecord import NumpyTFReader, NumpyTFWriter
+
+
+def diff(file1, file2):
+ results = []
+
+ dataset1 = NumpyTFReader(file1)
+ dataset2 = NumpyTFReader(file2)
+
+ for i, (x1, x2) in enumerate(zip(dataset1, dataset2)):
+ assert x1.keys() == x2.keys(), (
+ "At input %d the files have different sets of tensors.\n%s: %s\n%s: %s\n"
+ % (
+ i,
+ file1,
+ ", ".join(x1.keys()),
+ file2,
+ ", ".join(x2.keys()),
+ )
+ )
+ results.append({})
+ for k in x1.keys():
+ v1 = x1[k].numpy().astype(np.double)
+ v2 = x2[k].numpy().astype(np.double)
+ mae = abs(v1 - v2).mean()
+ results[-1][k] = mae
+
+ total = sum(sum(x.values()) for x in results)
+ count = sum(len(x.values()) for x in results)
+ mean = total / count
+ return results, mean
+
+
+def diff_stats(file1, file2, per_tensor_and_channel=False):
+ dataset1 = NumpyTFReader(file1)
+ dataset2 = NumpyTFReader(file2)
+
+ totals = defaultdict(dict)
+
+ def add_total(name, key, values):
+ if not key in totals[name]:
+ totals[name][key] = values
+ else:
+ totals[name][key] += values
+
+ # First iterate through dataset1 and calculate per-channel total for each tensor
+ count = 0
+ for d in dataset1:
+ count += 1
+ for k, v in d.items():
+ value = v.numpy().astype(np.double)
+ add_total("dataset1_total", k, value)
+
+ # Use this to calculate per-channel mean for each tensor
+ per_tensor_mean = lambda name: {
+ k: total / count for k, total in totals[name].items()
+ }
+ dataset1_mean = per_tensor_mean("dataset1_total")
+
+ # Next iterate through both datasets and calculate per-channel total squared error
+ # between them for each tensor and dataset1 variance for each tensor using the mean from above
+ for i, (x1, x2) in enumerate(zip(dataset1, dataset2)):
+ assert x1.keys() == x2.keys(), (
+ "At input %d the files have different sets of tensors.\n%s: %s\n%s: %s\n"
+ % (
+ i,
+ file1,
+ ", ".join(x1.keys()),
+ file2,
+ ", ".join(x2.keys()),
+ )
+ )
+ for k in x1.keys():
+ v1 = x1[k].numpy().astype(np.double)
+ v2 = x2[k].numpy().astype(np.double)
+ add_total("ae", k, abs(v1 - v2))
+ add_total("se", k, (v1 - v2) ** 2)
+ add_total("dataset1_variance", k, (v1 - dataset1_mean[k]) ** 2)
+
+ # Finally average over number of inputs to get the rmse and the dataset1 variance
+ mae = per_tensor_mean("ae")
+ mse = per_tensor_mean("se")
+ rmse = {k: np.sqrt(v) for k, v in mse.items()}
+ dataset1_var = per_tensor_mean("dataset1_variance")
+ is_nonzero = {k: dataset1_var[k] > 0 for k in dataset1_var}
+
+ # Divide by target standard deviation to get the per-channel nrmse for each tensor where possible
+ nrmse = {
+ k: v[is_nonzero[k]] / np.sqrt(dataset1_var[k][is_nonzero[k]])
+ for k, v in rmse.items()
+ }
+
+ if per_tensor_and_channel:
+ return mae, nrmse
+ else:
+ dict_mean = lambda d: np.mean(list(d.values()))
+ return dict_mean(mae), dict_mean(nrmse)