diff options
author | Johan Alfvén <johan.alfven@arm.com> | 2022-08-30 09:14:56 +0200 |
---|---|---|
committer | Fredrik Svedberg <fredrik.svedberg@arm.com> | 2022-09-01 11:33:50 +0000 |
commit | 17009399160defd4ab21d85249ff31804d732f4b (patch) | |
tree | 048ddcaf079437a583b7bab6d05e629419b67f04 /ethosu/vela/tflite_graph_optimiser.py | |
parent | 89a8cdd5425521f68674ac23a78790f0f6dc98ed (diff) | |
download | ethos-u-vela-17009399160defd4ab21d85249ff31804d732f4b.tar.gz |
MLBEDSW-5029: Output diff for Mean op
Fixed three test cases causing output diff compared to
the reference kernel for the Mean operator.
- If there is a possibility that the accumulator could saturate
the Mean op must run CPU
- Use correct rounding for the bias term
- If a Reshape op is followed by a Mean op, push the Reshape op
to the CPU since this cannot be handled by the NPU
Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Change-Id: I734465730372105821a5e2f73a6a125b9eb7d7f4
Diffstat (limited to 'ethosu/vela/tflite_graph_optimiser.py')
-rw-r--r-- | ethosu/vela/tflite_graph_optimiser.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 3646b01e..38e3f603 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -42,6 +42,7 @@ from .graph_optimiser_util import set_ifm_ofm_op_shapes from .graph_optimiser_util import set_tensor_equivalence from .numeric_util import clamp_sigmoid from .numeric_util import round_away_zero +from .numeric_util import round_up_to_int from .operation import create_activation_function from .operation import ExplicitScaling from .operation import NpuBlockType @@ -1365,7 +1366,7 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): fiq = ifmq.clone() fiq.zero_point = 0 op.forced_input_quantization = fiq - bias_term = ofmq.zero_point - int(ifmq.zero_point * ifmq.scale_f32 / ofmq.scale_f32) + bias_term = ofmq.zero_point - round_up_to_int(ifmq.zero_point * ifmq.scale_f32 / ofmq.scale_f32) # If the bias term is outside uint8 range, we need an Add op to apply it. if bias_term < 0 or bias_term > 255: intermediate = op.ofm.clone(suffix="_intermediate", set_unique=True) |