diff options
Diffstat (limited to 'ethosu')
-rw-r--r-- | ethosu/vela/test/test_tflite_supported_operators.py | 7 | ||||
-rw-r--r-- | ethosu/vela/tflite_graph_optimiser.py | 92 | ||||
-rw-r--r-- | ethosu/vela/tflite_supported_operators.py | 31 |
3 files changed, 62 insertions, 68 deletions
diff --git a/ethosu/vela/test/test_tflite_supported_operators.py b/ethosu/vela/test/test_tflite_supported_operators.py index 04f10e9a..74dd3bf2 100644 --- a/ethosu/vela/test/test_tflite_supported_operators.py +++ b/ethosu/vela/test/test_tflite_supported_operators.py @@ -618,13 +618,6 @@ def test_mean_hw_product(): assert not support.is_operator_supported(op) -def test_mean_hw_product_avgpool(): - op = create_mean([1, 200, 200, 16], [1, 16], [1, 2], DataType.uint8, {"keep_dims": False}) - assert support.is_operator_supported(op) - op = create_mean([1, 200, 200, 16], [1, 1, 1, 16], [1, 2], DataType.int8, {"keep_dims": True}) - assert not support.is_operator_supported(op) - - def test_lstm_support(): # Test valid configuration op = testutil.create_lstm_op(3, 12, 24, 20, DataType.int8) diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 478d0189..393a8323 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -45,6 +45,7 @@ from .lstm import Lstm from .numeric_util import clamp_sigmoid from .numeric_util import full_shape from .numeric_util import round_away_zero +from .numeric_util import round_down_log2 from .operation import create_activation_function from .operation import ExplicitScaling from .operation import NpuBlockType @@ -1827,22 +1828,7 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): # Set IFM/OFM shapes after changing op type op.set_ifm_ofm_shapes() - weight_scale, bias = 1, 0 ofmq, ifmq = op.ofm.quantization, inp.quantization - if ifmq.is_scaling_equal(ofmq): - # Here we can just use a simple AvgPool with truncating rounding, - # as we're emulating simple integer division. - op.rounding_mode = NpuRoundingMode.TRUNCATE - op.type = Op.AvgPool - op.attrs.update({"ksize": (1, h, w, 1), "filter_height": h, "filter_width": w}) - else: - op.rounding_mode = NpuRoundingMode.NATURAL - weight_scale = 1 / (h * w) - # Input zero point is adjusted after mean calculation, so we emulate that with a bias - bias = -ifmq.zero_point * h * w - fiq = ifmq.clone() - fiq.zero_point = 0 - op.forced_input_quantization = fiq # Change dimensions to 4 def extend_dims(dim, in_shape): @@ -1867,28 +1853,18 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): # If height is greater than max kernel height, reshape from HxW to 1x(HxW) weight_shape = None - if (h > 64 and op.type == Op.DepthwiseConv2DBias) or (h > 256 and op.type == Op.AvgPool): + if h > 64: # This can only happen and be done for multiple axes, and - # h * w <= 256 for DepthwiseConv2DBias - # h * w <= 4096 for AvgPool + # h * w <= 4096 for DepthwiseConv2DBias # which is checked in supported ops shape = [shape[0], 1, h * w, shape[3]] op.ifm_shapes[0] = Shape4D(shape) weight_shape = [1, h * w, shape[3], shape[0]] - if h > 256 and op.type == Op.AvgPool: - op.attrs.update({"ksize": (1, 1, h * w, 1), "filter_height": 1, "filter_width": h * w}) - # If the AvgPool version is used, we don't need to do anything else - if op.type == Op.AvgPool: - DebugDatabase.add_optimised(op, op) - return op - - # Make unit weight tensor quantization - weight_quant = ifmq.clone() - weight_quant.min = 0 - weight_quant.max = 255 - weight_quant.scale_f32 = weight_scale - weight_quant.zero_point = 0 + op.rounding_mode = NpuRoundingMode.NATURAL + identity_quant = QuantizationParameters(scale_f32=1.0, zero_point=0) + op.forced_input_quantization = identity_quant + op.forced_output_quantization = identity_quant if weight_shape is None: # Set weight shape to [H,W,C,B] @@ -1901,17 +1877,65 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): weight_shape, inp.dtype, np.ones(weight_shape), - quantization=weight_quant, + quantization=identity_quant, ), 1, ) op.weights.values = np.reshape(op.inputs[1].values, weight_shape) - # Add bias tensor + # Input zero point is adjusted after the sum calculation, so we emulate that with a bias + bias = -ifmq.zero_point * h * w bias_shape = [shape[-1]] - op.inputs.append(create_const_tensor("bias", bias_shape, DataType.int32, np.ones(bias_shape) * bias)) + op.inputs.append(create_const_tensor(op.name + "_bias", bias_shape, DataType.int32, np.ones(bias_shape) * bias)) DebugDatabase.add_optimised(op, op) + # Multiply sum with 1/num_elements_in_axis to get the mean + intermediate = op.ofm.clone(suffix="_intermediate", set_unique=True) + intermediate.dtype = DataType.int32 + mul_op = Operation(Op.Mul, op.name + "_mul") + mul_op.add_input_tensor(intermediate) + mul_op.set_output_tensor(op.ofm) + mul_op.forced_input_quantization = identity_quant + + # The multiplier is calculated in the same way as in the reference, + # clamping the shift value at the price of some precision loss. + num_elements_in_axis = int(h * w) + output_multiplier, output_shift_vela = quantise_scale(np.double(ifmq.scale_f32) / np.double(ofmq.scale_f32)) + + # Convert to reference representation shift value + output_shift = 31 - output_shift_vela + + # Reference calculation + # round_down_log2 same as 63 - CountLeadingZeros(num_elements_in_axis) + shift = round_down_log2(num_elements_in_axis) + shift = min(shift, 32) + shift = min(shift, 31 + output_shift) + output_multiplier = (output_multiplier << shift) // num_elements_in_axis + output_shift = output_shift - shift + + # Convert to vela representation shift + output_shift_vela = 31 - output_shift + + # For int32 scaling is not supported so instead multiply with the scale + # intermediate * scale -> round and shift. + scalar = create_const_tensor( + op.name + "_scalar", [1, 1, 1, 1], DataType.int32, [output_multiplier], quantization=identity_quant + ) + mul_op.add_input_tensor(scalar) + mul_op.set_ifm_ofm_shapes() + + # Reference using TFL rounding for the multiply + mul_op.rounding_mode = NpuRoundingMode.TFL + + # Need to use explicit scaling to get the wanted shift + mul_op.explicit_scaling = ExplicitScaling(False, [output_shift_vela], [1]) + + mul_op.activation = op.activation + op.activation = None + op.set_output_tensor(intermediate) + op.set_ifm_ofm_shapes() + DebugDatabase.add_optimised(op, mul_op) + return op diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py index 457c35eb..9ace3219 100644 --- a/ethosu/vela/tflite_supported_operators.py +++ b/ethosu/vela/tflite_supported_operators.py @@ -191,7 +191,6 @@ class TFLiteSupportedOperators: filter_height_range = (1, 256) filter_product_range = (1, 256 * 256) mean_kernel_product = 64 * 64 - mean_kernel_product_avgpool = 256 * 256 def __init__(self): # Setup the generic constraints. Note: the order matters @@ -309,7 +308,6 @@ class TFLiteSupportedOperators: self.specific_constraints[Op.Pad].append(TFLiteSupportedOperators.constraint_pad_type) # Mean specific checks: - self.specific_constraints[Op.Mean].append(TFLiteSupportedOperators.constraint_mean_height_width_product_avgpool) self.specific_constraints[Op.Mean].append(TFLiteSupportedOperators.constraint_mean_height_width_product) self.specific_constraints[Op.Mean].append(TFLiteSupportedOperators.constraint_mean_height_single_axis) @@ -809,26 +807,9 @@ class TFLiteSupportedOperators: return valid, f"Op has ifm_shape={ifm_shape} and ifm2_shape={ifm2_shape}" @classmethod - @docstring_format_args([mean_kernel_product_avgpool]) - def constraint_mean_height_width_product_avgpool(cls, op): - """Product of height and width must be no greater than {}""" - shape = op.inputs[0].shape - hi = 0 if len(shape) < 4 else 1 - h, w = shape[hi : hi + 2] - max_prod = cls.mean_kernel_product_avgpool - return h * w <= max_prod, f"Product of height and width is {h * w}" - - @classmethod @docstring_format_args([mean_kernel_product]) def constraint_mean_height_width_product(cls, op): - """Product of height and width must be no greater than {} when: - IFM and OFM have different scale or zero point; or - 'keep_dims' is True""" - ifmq, ofmq = op.ifm.quantization, op.ofm.quantization - keep_dims = op.attrs.get("keep_dims") - # doesn't apply, size is checked by constraint_mean_height_width_product_avgpool - if not keep_dims and ifmq.scale_f32 == ofmq.scale_f32 and ifmq.zero_point == ofmq.zero_point: - return True, "" + """Product of height and width must be no greater than {}""" shape = op.inputs[0].shape hi = 0 if len(shape) < 4 else 1 h, w = shape[hi : hi + 2] @@ -836,11 +817,10 @@ class TFLiteSupportedOperators: return h * w <= max_prod, f"Product of height and width is {h * w}" @classmethod - @docstring_format_args([filter_height_range[1], dilated_height_range[1]]) + @docstring_format_args([dilated_height_range[1]]) def constraint_mean_height_single_axis(cls, op): """For single axis averages across the height dimension: - IFM height must be no greater than {} if the IFM and OFM scale and zero point match; otherwise - IFM height must be no greater than {} if the IFM and OFM scale or zero point do not match""" + IFM height must be no greater than {}""" inp, axis = op.inputs if axis.shape == [] or axis.shape[0] == 1: # single axis axis = int(axis.values) if len(axis.shape) == 0 else int(axis.values[0]) @@ -859,10 +839,7 @@ class TFLiteSupportedOperators: h = shape[axis] ifm, ofm = op.get_ifm_ofm() - if check_quantized_tens_scaling_equal(ifm, ofm): - return h <= cls.filter_height_range[1], f"Height is {h}, IFM and OFM quantizations match" - else: - return h <= cls.dilated_height_range[1], f"Height is {h}, IFM and OFM quantizations do not match" + return h <= cls.dilated_height_range[1], f"Height is {h}" @staticmethod def constraint_reshape_shape_constant(op): |