MLBEDSW-7074: Updated reference kernel for the MEAN op

The reference kernel for the MEAN operator has changed. As a result, the mean implementation can be simplified and the constraint for mean int8 can be removed. Signed-off-by: Johan Alfven <johan.alfven@arm.com> Change-Id: I318e9b495eefea99e7ac4aea4b8c436c83753405
author: Johan Alfvén <johan.alfven@arm.com> 2022-10-27 16:30:01 +0200
committer: Tim Hall <tim.hall@arm.com> 2022-11-03 16:35:22 +0000
commit: 9d51ec41855a8be21bd0708c882d121e5bb5afcc (patch)
tree: 69d5fa0d2da78c9df2ccc0999756b4a9e8368901
parent: 92cd33b7adbb799b6593d49f1d29c13a85933e55 (diff)
download: ethos-u-vela-9d51ec41855a8be21bd0708c882d121e5bb5afcc.tar.gz
4 files changed, 2 insertions, 128 deletions
diff --git a/SUPPORTED_OPS.md b/SUPPORTED_OPS.md
index 2efd017c..d1814a64 100644
--- a/SUPPORTED_OPS.md
+++ b/SUPPORTED_OPS.md
@@ -1,7 +1,7 @@
 # Supported Ops
 
 This file was automatically generated by Vela using the `--supported-ops-report` parameter.  
-Vela version: `3.6.0rc1.dev18+g6ef0230.d20221018`
+Vela version: `3.6.0rc1.dev37+g53605be.d20221027`
 
 This file complies with
 [**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)
@@ -212,12 +212,6 @@ This is a list of constraints that the MEAN operator must satisfy in order to be
 - Product of height and width must be no greater than 4096 when:  
         IFM and OFM have different scale or zero point; or  
         'keep_dims' is True
-- Product of IFM height and width must be no greater than 256 when:  
-        The IFM shape has 4 dimensions; and  
-        The axis indices specify reduction across 2 dimensions; and  
-        The axis indices correspond to the width and height dimensions of the IFM; and  
-        'keep_dims' is True; and  
-        IFM datatype is int8
 - For single axis averages across the height dimension:  
         IFM height must be no greater than 256 if the IFM and OFM scale and zero point match; otherwise  
         IFM height must be no greater than 64 if the IFM and OFM scale or zero point do not match
diff --git a/ethosu/vela/test/test_tflite_supported_operators.py b/ethosu/vela/test/test_tflite_supported_operators.py
index 790f0018..4410938e 100644
--- a/ethosu/vela/test/test_tflite_supported_operators.py
+++ b/ethosu/vela/test/test_tflite_supported_operators.py
@@ -608,13 +608,6 @@ def test_mean_hw_product():
     assert not support.is_operator_supported(op)
 
 
-def test_mean_hw_product_int8():
-    op = create_mean([1, 16, 16, 16], [1, 1, 1, 16], [1, 2], DataType.int8, {"keep_dims": True})
-    assert support.is_operator_supported(op)
-    op = create_mean([1, 16, 17, 16], [1, 1, 1, 16], [1, 2], DataType.int8, {"keep_dims": True})
-    assert not support.is_operator_supported(op)
-
-
 def test_mean_hw_product_avgpool():
     op = create_mean([1, 200, 200, 16], [1, 16], [1, 2], DataType.uint8, {"keep_dims": False})
     assert support.is_operator_supported(op)
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 0ba5abf5..b8e61f48 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -42,7 +42,6 @@ from .graph_optimiser_util import set_ifm_ofm_op_shapes
 from .graph_optimiser_util import set_tensor_equivalence
 from .numeric_util import clamp_sigmoid
 from .numeric_util import round_away_zero
-from .numeric_util import round_up_to_int
 from .operation import create_activation_function
 from .operation import ExplicitScaling
 from .operation import NpuBlockType
@@ -1586,7 +1585,6 @@ def fixup_asymmetric_weights(op, arch, nng):
 
 def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng):
     if op.type == Op.Mean and op.run_on_npu:
-        keep_dims = op.attrs.get("keep_dims", False)
         inp, axis = op.inputs
         shape = inp.shape
         ofm_shape = op.ofm.shape
@@ -1631,91 +1629,7 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng):
 
         weight_scale, bias = 1, 0
         ofmq, ifmq = op.ofm.quantization, inp.quantization
-        # Set rounding mode, scaling and zero point based on which reference implementation to match
-        if len(shape) == 4 and axis == [1, 2] and keep_dims:
-            if inp.dtype == DataType.uint8:
-                # This attribute means a different scaling calculation is used in order to match reference
-                op.low_precision_scaling = True
-                weight_scale = h * w
-                # Set zero points to 0 as they will be adjusted for with bias term
-                foq = ofmq.clone()
-                foq.zero_point = 0
-                fiq = ifmq.clone()
-                fiq.zero_point = 0
-                op.forced_input_quantization = fiq
-                bias_term = ofmq.zero_point - round_up_to_int(ifmq.zero_point * ifmq.scale_f32 / ofmq.scale_f32)
-                # If the bias term is outside uint8 range, we need an Add op to apply it.
-                if bias_term < 0 or bias_term > 255:
-                    intermediate = op.ofm.clone(suffix="_intermediate", set_unique=True)
-                    # Bias term has higher bitness (i32) than input/output (u8).
-                    # 16 bits is enough since the bias is added/subtracted from a u8 value,
-                    # the bias can only effectively assume values in the range [-255, 255].
-                    intermediate.dtype = DataType.int16
-                    intermediate.quantization.zero_point = 0
-                    add_op = Operation(Op.Add, f"{op.name}_bias")
-                    add_op.forced_output_quantization = foq
-                    add_op.add_input_tensor(intermediate)
-                    quant = QuantizationParameters()
-                    quant.zero_point = 0
-                    bias_scalar = create_const_tensor(add_op.name, [], DataType.int16, [bias_term], quantization=quant)
-                    add_op.add_input_tensor(bias_scalar)
-                    add_op.set_output_tensor(op.ofm)
-                    add_op.set_ifm_ofm_shapes()
-                    add_op.activation = op.activation
-                    op.activation = None
-                    op.set_output_tensor(intermediate)
-                    op.set_ifm_ofm_shapes()
-                # If not, we can just do it with the OFM zero point.
-                else:
-                    foq.zero_point = bias_term
-                    op.forced_output_quantization = foq
-            else:
-                assert inp.dtype == DataType.int8
-                # Use a depthwise to calculate the sum,
-                # followed by a multiplication with 1/N to get the MEAN
-                weight_scale = 1
-                intermediate = op.ofm.clone(suffix="_intermediate", set_unique=True)
-                intermediate.dtype = DataType.int32
-                mul_op = Operation(Op.Mul, op.name + "_mul")
-                mul_op.add_input_tensor(intermediate)
-                mul_op.set_output_tensor(op.ofm)
-                # Create scalar containing 1/N
-                quant = QuantizationParameters()
-                quant.zero_point = 0
-                # The reference rounds negative numbers downwards, e.g. -1.5 is rounded to -2,
-                # while rounding mode NATURAL would round this to -1.
-                # This can only occur if N is even, and can be emulated by
-                # multiplying with a number that is slightly smaller than 1/N.
-                # It must be so small that other roundings are not affected;
-                # the calculated value is based on worst case,
-                # which is sum 256 * N (the maximum sum that can occur with int8)
-                n = int(h * w)
-                eps = 1 / (256 * (n + 1)) if n % 2 == 0 else 0
-                quant.scale_f32 = 1 / (n - eps)
-
-                # For int8/int16 we could use IFM/OFM scaling to do the division
-                # intermediate * 1 -> scale > round and shift.
-                #
-                # For int32 scaling is not supported so instead multiply with the scale
-                # intermediate * scale -> round and shift.
-                #
-                # Calculate the scale and shift value. const Tensor must be created
-                # with correct quantization since the scale and shift is calculated later
-                # in the command stream generator.
-                mul_scale, _ = scaling.elementwise_mul_scale(
-                    mul_op.ifm.quantization.scale_f32, quant.scale_f32, mul_op.ofm.quantization.scale_f32
-                )
-                scalar = create_const_tensor(
-                    op.name + "_scalar", [1, 1, 1, 1], DataType.int32, [mul_scale], np.int32, quantization=quant
-                )
-                mul_op.add_input_tensor(scalar)
-                mul_op.set_ifm_ofm_shapes()
-                mul_op.rounding_mode = NpuRoundingMode.NATURAL
-                mul_op.activation = op.activation
-                op.activation = None
-                op.set_output_tensor(intermediate)
-                op.set_ifm_ofm_shapes()
-        elif ifmq.zero_point == ofmq.zero_point and ifmq.scale_f32 == ofmq.scale_f32:
+        if ifmq.is_scaling_equal(ofmq):
             # Here we can just use a simple AvgPool with truncating rounding,
             # as we're emulating simple integer division.
             op.rounding_mode = NpuRoundingMode.TRUNCATE
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index b8fe4b6a..fd8bbeef 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -197,7 +197,6 @@ class TFLiteSupportedOperators:
     filter_height_range = (1, 256)
     filter_product_range = (1, 256 * 256)
     mean_kernel_product = 64 * 64
-    mean_kernel_product_int8 = 16 * 16
     mean_kernel_product_avgpool = 256 * 256
 
     def __init__(self):
@@ -314,7 +313,6 @@ class TFLiteSupportedOperators:
         # Mean specific checks:
         self.specific_constraints[Op.Mean].append(TFLiteSupportedOperators.constraint_mean_height_width_product_avgpool)
         self.specific_constraints[Op.Mean].append(TFLiteSupportedOperators.constraint_mean_height_width_product)
-        self.specific_constraints[Op.Mean].append(TFLiteSupportedOperators.constraint_mean_height_width_product_int8)
         self.specific_constraints[Op.Mean].append(TFLiteSupportedOperators.constraint_mean_height_single_axis)
 
         # Reshape specific checks:
@@ -810,31 +808,6 @@ class TFLiteSupportedOperators:
         return h * w <= max_prod, f"Product of height and width is {h * w}"
 
     @classmethod
-    @docstring_format_args([mean_kernel_product_int8])
-    def constraint_mean_height_width_product_int8(cls, op):
-        """Product of IFM height and width must be no greater than {} when:
-        The IFM shape has 4 dimensions; and
-        The axis indices specify reduction across 2 dimensions; and
-        The axis indices correspond to the width and height dimensions of the IFM; and
-        'keep_dims' is True; and
-        IFM datatype is int8"""
-        shape = op.ifm.shape
-        axis = int(op.inputs[1].values) if op.inputs[1].shape == [] else list(op.inputs[1].values)
-        # doesn't apply, size is checked by constraint_mean_height_width_product_avgpool
-        # and constraint_mean_height_width_product
-        if (
-            len(shape) != 4
-            or op.ifm.dtype != DataType.int8
-            or not op.attrs.get("keep_dims")
-            or axis not in ([1, 2], [2, 1])
-        ):
-            return True, ""
-        h = shape[-3]
-        w = shape[-2]
-        max_prod = cls.mean_kernel_product_int8
-        return h * w <= max_prod, f"Product of height and width is {h * w}"
-
-    @classmethod
     @docstring_format_args([filter_height_range[1], dilated_height_range[1]])
     def constraint_mean_height_single_axis(cls, op):
         """For single axis averages across the height dimension:
author	Johan Alfvén <johan.alfven@arm.com>	2022-10-27 16:30:01 +0200
committer	Tim Hall <tim.hall@arm.com>	2022-11-03 16:35:22 +0000
commit	9d51ec41855a8be21bd0708c882d121e5bb5afcc (patch)
tree	69d5fa0d2da78c9df2ccc0999756b4a9e8368901
parent	92cd33b7adbb799b6593d49f1d29c13a85933e55 (diff)
download	ethos-u-vela-9d51ec41855a8be21bd0708c882d121e5bb5afcc.tar.gz