diff options
author | Annie Tallund <annie.tallund@arm.com> | 2023-03-13 17:00:31 +0100 |
---|---|---|
committer | Benjamin Klimczak <benjamin.klimczak@arm.com> | 2023-10-11 15:41:48 +0100 |
commit | f0b8ed75fed9dc69ab1f6313339f9f7e38bfc725 (patch) | |
tree | bc353fad664040b44915b5cf7ae807894b0b87e8 /src/mlia/nn/rewrite/core/graph_edit/diff.py | |
parent | b236127b9a18ec2668271c6b5baafa6a7c1dde51 (diff) | |
download | mlia-f0b8ed75fed9dc69ab1f6313339f9f7e38bfc725.tar.gz |
MLIA-845 Migrate rewrite code
- Add required files for rewriting of TensorFlow Lite graphs
- Adapt rewrite dependency paths and project name
- Add license headers
Change-Id: I19c5f63215fe2af2fa7d7d44af08144c6c5f911c
Signed-off-by: Benjamin Klimczak <benjamin.klimczak@arm.com>
Diffstat (limited to 'src/mlia/nn/rewrite/core/graph_edit/diff.py')
-rw-r--r-- | src/mlia/nn/rewrite/core/graph_edit/diff.py | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/src/mlia/nn/rewrite/core/graph_edit/diff.py b/src/mlia/nn/rewrite/core/graph_edit/diff.py new file mode 100644 index 0000000..b6c9616 --- /dev/null +++ b/src/mlia/nn/rewrite/core/graph_edit/diff.py @@ -0,0 +1,109 @@ +# SPDX-FileCopyrightText: Copyright 2023, Arm Limited and/or its affiliates. +# SPDX-License-Identifier: Apache-2.0 +import os +from collections import defaultdict + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +import tensorflow as tf + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +import numpy as np +from tensorflow.lite.python import interpreter as interpreter_wrapper +from mlia.nn.rewrite.core.utils.numpy_tfrecord import NumpyTFReader, NumpyTFWriter + + +def diff(file1, file2): + results = [] + + dataset1 = NumpyTFReader(file1) + dataset2 = NumpyTFReader(file2) + + for i, (x1, x2) in enumerate(zip(dataset1, dataset2)): + assert x1.keys() == x2.keys(), ( + "At input %d the files have different sets of tensors.\n%s: %s\n%s: %s\n" + % ( + i, + file1, + ", ".join(x1.keys()), + file2, + ", ".join(x2.keys()), + ) + ) + results.append({}) + for k in x1.keys(): + v1 = x1[k].numpy().astype(np.double) + v2 = x2[k].numpy().astype(np.double) + mae = abs(v1 - v2).mean() + results[-1][k] = mae + + total = sum(sum(x.values()) for x in results) + count = sum(len(x.values()) for x in results) + mean = total / count + return results, mean + + +def diff_stats(file1, file2, per_tensor_and_channel=False): + dataset1 = NumpyTFReader(file1) + dataset2 = NumpyTFReader(file2) + + totals = defaultdict(dict) + + def add_total(name, key, values): + if not key in totals[name]: + totals[name][key] = values + else: + totals[name][key] += values + + # First iterate through dataset1 and calculate per-channel total for each tensor + count = 0 + for d in dataset1: + count += 1 + for k, v in d.items(): + value = v.numpy().astype(np.double) + add_total("dataset1_total", k, value) + + # Use this to calculate per-channel mean for each tensor + per_tensor_mean = lambda name: { + k: total / count for k, total in totals[name].items() + } + dataset1_mean = per_tensor_mean("dataset1_total") + + # Next iterate through both datasets and calculate per-channel total squared error + # between them for each tensor and dataset1 variance for each tensor using the mean from above + for i, (x1, x2) in enumerate(zip(dataset1, dataset2)): + assert x1.keys() == x2.keys(), ( + "At input %d the files have different sets of tensors.\n%s: %s\n%s: %s\n" + % ( + i, + file1, + ", ".join(x1.keys()), + file2, + ", ".join(x2.keys()), + ) + ) + for k in x1.keys(): + v1 = x1[k].numpy().astype(np.double) + v2 = x2[k].numpy().astype(np.double) + add_total("ae", k, abs(v1 - v2)) + add_total("se", k, (v1 - v2) ** 2) + add_total("dataset1_variance", k, (v1 - dataset1_mean[k]) ** 2) + + # Finally average over number of inputs to get the rmse and the dataset1 variance + mae = per_tensor_mean("ae") + mse = per_tensor_mean("se") + rmse = {k: np.sqrt(v) for k, v in mse.items()} + dataset1_var = per_tensor_mean("dataset1_variance") + is_nonzero = {k: dataset1_var[k] > 0 for k in dataset1_var} + + # Divide by target standard deviation to get the per-channel nrmse for each tensor where possible + nrmse = { + k: v[is_nonzero[k]] / np.sqrt(dataset1_var[k][is_nonzero[k]]) + for k, v in rmse.items() + } + + if per_tensor_and_channel: + return mae, nrmse + else: + dict_mean = lambda d: np.mean(list(d.values())) + return dict_mean(mae), dict_mean(nrmse) |