diff options
author | Johan Alfvén <johan.alfven@arm.com> | 2022-08-30 09:14:56 +0200 |
---|---|---|
committer | Fredrik Svedberg <fredrik.svedberg@arm.com> | 2022-09-01 11:33:50 +0000 |
commit | 17009399160defd4ab21d85249ff31804d732f4b (patch) | |
tree | 048ddcaf079437a583b7bab6d05e629419b67f04 | |
parent | 89a8cdd5425521f68674ac23a78790f0f6dc98ed (diff) | |
download | ethos-u-vela-17009399160defd4ab21d85249ff31804d732f4b.tar.gz |
MLBEDSW-5029: Output diff for Mean op
Fixed three test cases causing output diff compared to
the reference kernel for the Mean operator.
- If there is a possibility that the accumulator could saturate
the Mean op must run CPU
- Use correct rounding for the bias term
- If a Reshape op is followed by a Mean op, push the Reshape op
to the CPU since this cannot be handled by the NPU
Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Change-Id: I734465730372105821a5e2f73a6a125b9eb7d7f4
-rw-r--r-- | ethosu/vela/test/test_tflite_supported_operators.py | 16 | ||||
-rw-r--r-- | ethosu/vela/tflite_graph_optimiser.py | 3 | ||||
-rw-r--r-- | ethosu/vela/tflite_supported_operators.py | 55 |
3 files changed, 67 insertions, 7 deletions
diff --git a/ethosu/vela/test/test_tflite_supported_operators.py b/ethosu/vela/test/test_tflite_supported_operators.py index 89c27997..cc8b3d2c 100644 --- a/ethosu/vela/test/test_tflite_supported_operators.py +++ b/ethosu/vela/test/test_tflite_supported_operators.py @@ -623,6 +623,22 @@ def test_mean_hw_product_int8(): op = create_mean([1, 16, 17, 16], [1, 1, 1, 16], [1, 2], DataType.int8, {"keep_dims": True}) assert not support.is_operator_supported(op) + # Create OP that will not saturate the accumulator + op = create_mean([1, 5, 14, 16], [1, 1, 1, 16], [1, 2], DataType.int8, {"keep_dims": True}) + op.ifm.quantization.scale_f32 = 2.0 + op.ifm.quantization.zero_point = 95 + op.ofm.quantization.scale_f32 = 1.0 + op.ofm.quantization.zero_point = 95 + assert support.is_operator_supported(op) + + # Create OP that can saturate the accumulator + op = create_mean([1, 6, 14, 16], [1, 1, 1, 16], [1, 2], DataType.int8, {"keep_dims": True}) + op.ifm.quantization.scale_f32 = 2.0 + op.ifm.quantization.zero_point = 95 + op.ofm.quantization.scale_f32 = 1.0 + op.ofm.quantization.zero_point = 95 + assert not support.is_operator_supported(op) + def test_mean_hw_product_avgpool(): op = create_mean([1, 200, 200, 16], [1, 16], [1, 2], DataType.uint8, {"keep_dims": False}) diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 3646b01e..38e3f603 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -42,6 +42,7 @@ from .graph_optimiser_util import set_ifm_ofm_op_shapes from .graph_optimiser_util import set_tensor_equivalence from .numeric_util import clamp_sigmoid from .numeric_util import round_away_zero +from .numeric_util import round_up_to_int from .operation import create_activation_function from .operation import ExplicitScaling from .operation import NpuBlockType @@ -1365,7 +1366,7 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): fiq = ifmq.clone() fiq.zero_point = 0 op.forced_input_quantization = fiq - bias_term = ofmq.zero_point - int(ifmq.zero_point * ifmq.scale_f32 / ofmq.scale_f32) + bias_term = ofmq.zero_point - round_up_to_int(ifmq.zero_point * ifmq.scale_f32 / ofmq.scale_f32) # If the bias term is outside uint8 range, we need an Add op to apply it. if bias_term < 0 or bias_term > 255: intermediate = op.ofm.clone(suffix="_intermediate", set_unique=True) diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py index 1915d43b..f01a6690 100644 --- a/ethosu/vela/tflite_supported_operators.py +++ b/ethosu/vela/tflite_supported_operators.py @@ -304,6 +304,7 @@ class TFLiteSupportedOperators: # Reshape specific checks: self.specific_constraints[Op.Reshape].append(TFLiteSupportedOperators.constraint_reshape_shape_constant) + self.specific_constraints[Op.Reshape].append(TFLiteSupportedOperators.constraint_reshape_before_mean) # Concat specific checks: for op_type in (Op.Concat, Op.ConcatTFLite): @@ -795,10 +796,9 @@ class TFLiteSupportedOperators: max_prod = cls.mean_kernel_product return h * w <= max_prod, f"Product of height and width is {h * w}" - @classmethod - @docstring_format_args([mean_kernel_product_int8]) - def constraint_mean_height_width_product_int8(cls, op): - """Product of IFM height and width must be no greater than {} when: + @staticmethod + def constraint_mean_height_width_product_int8(op): + """Number of IFM height and width elements might cause accumulator saturation when; The IFM shape has 4 dimensions; and The axis indices specify reduction across 2 dimensions; and The axis indices correspond to the width and height dimensions of the IFM; and @@ -817,8 +817,43 @@ class TFLiteSupportedOperators: return True, "" h = shape[-3] w = shape[-2] - max_prod = cls.mean_kernel_product_int8 - return h * w <= max_prod, f"Product of height and width is {h * w}" + + ifmq, ofmq = op.ifm.quantization, op.ofm.quantization + + # Scale factor + real_scale = ifmq.scale_f32 / ofmq.scale_f32 + + # Min and max value + ifm_min_val = np.iinfo(np.int8).min - ifmq.zero_point + ifm_max_val = np.iinfo(np.int8).max - ifmq.zero_point + + # Accumulator limits + min_acc_limit = np.iinfo(np.int16).min + max_acc_limit = np.iinfo(np.int16).max + + # Theoretical max/min value that accumulator need to store + min_acc_sum = h * w * ifm_min_val * real_scale + ofmq.zero_point + max_acc_sum = h * w * ifm_max_val * real_scale + ofmq.zero_point + + # Max product of heigth and width that will not saturate the accumulator + ifm_min_val = 1 if ifm_min_val == 0 else ifm_min_val + ifm_max_val = 1 if ifm_max_val == 0 else ifm_max_val + if max_acc_sum > abs(min_acc_sum): + max_hw = int((max_acc_limit - ofmq.zero_point) / real_scale / ifm_max_val) + else: + max_hw = int((min_acc_limit - ofmq.zero_point) / real_scale / ifm_min_val) + + extra = [] + + extra.append(f" Possible accumulator range is ({min_acc_sum} - {max_acc_sum})\n") + extra.append(f" Maximum accumulator range is ({min_acc_limit} - {max_acc_limit})\n") + extra.append( + f" Based on the IFM and OFM quantization the IFM height and width must be no greater than {max_hw}" + ) + + extra = "".join(extra) + + return (min_acc_sum >= min_acc_limit and max_acc_sum <= max_acc_limit, f"\n{extra}") @classmethod @docstring_format_args([filter_height_range[1], dilated_height_range[1]]) @@ -867,6 +902,14 @@ class TFLiteSupportedOperators: return valid, f"Op has non-const input(s): {extra}" @staticmethod + def constraint_reshape_before_mean(op): + "Reshape on NPU not supported before MEAN operator" + for next_op in op.outputs[0].consumers(): + if next_op is not None and next_op.type == Op.Mean: + return False, "" + return True, "" + + @staticmethod def constraint_concat_valid_dimensions_non_axis(op): """All Input dimensions must match OFM dimension in all axes except the one defined by the axis attribute""" valid = True |