From a68b82a055167e12761c3d2a309e8230011dc401 Mon Sep 17 00:00:00 2001 From: Rickard Bolin Date: Thu, 20 Apr 2023 15:12:28 +0000 Subject: MLBEDSW-7527: Mean operator output diff Mean operators with height larger than 64 are reshaped but the IFM shape was then reset to the original value, causing an output diff. Signed-off-by: Rickard Bolin Change-Id: I3a89d4efac53173cbd6fe0a5c0542e028bed42ad --- ethosu/vela/tflite_graph_optimiser.py | 44 ++++++++++++++++------------------- 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'ethosu') diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 6297fca2..c79f154a 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -1784,7 +1784,7 @@ def fixup_or_check_asymmetric_weights(force_symmetric_int_weights): return check_asymmetric_weights -def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): +def convert_mean_to_depthwise_conv(op, arch, nng): if op.type == Op.Mean and op.run_on_npu: inp, axis = op.inputs shape = inp.shape @@ -1796,15 +1796,11 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): if axis.shape == [] or axis.shape[0] == 1: # single axis axis = int(axis.values) if len(axis.shape) == 0 else int(axis.values[0]) if dims in (2, 3): - if axis == 0: - h, w = shape[axis], 1 - else: - h, w = 1, shape[axis] + # If dims is 2 or 3, axis 0 refers to h-dimension + h, w = (shape[axis], 1) if axis == 0 else (1, shape[axis]) else: - if axis == 1: - h, w = shape[axis], 1 - else: - h, w = 1, shape[axis] + # If dims is 4, axis 1 refers to h-dimension + h, w = (shape[axis], 1) if axis == 1 else (1, shape[axis]) else: # multiple axes axis = sorted(axis.values) h, w = [shape[i] for i in axis] @@ -1828,8 +1824,6 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): # Set IFM/OFM shapes after changing op type op.set_ifm_ofm_shapes() - ofmq, ifmq = op.ofm.quantization, inp.quantization - # Change dimensions to 4 def extend_dims(dim, in_shape): if dim < 4: @@ -1852,7 +1846,6 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): op.set_ifm_ofm_shapes() # If height is greater than max kernel height, reshape from HxW to 1x(HxW) - weight_shape = None if h > 64: # This can only happen and be done for multiple axes, and # h * w <= 4096 for DepthwiseConv2DBias @@ -1860,16 +1853,15 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): shape = [shape[0], 1, h * w, shape[3]] op.ifm_shapes[0] = Shape4D(shape) weight_shape = [1, h * w, shape[3], shape[0]] + else: + # Set weight shape to [H,W,C,B] + weight_shape = [h, w, shape[3], shape[0]] op.rounding_mode = NpuRoundingMode.NATURAL identity_quant = QuantizationParameters(scale_f32=1.0, zero_point=0) op.forced_input_quantization = identity_quant op.forced_output_quantization = identity_quant - if weight_shape is None: - # Set weight shape to [H,W,C,B] - weight_shape = [h, w, shape[3], shape[0]] - # Add unit weight tensor op.set_input_tensor( create_const_tensor( @@ -1884,19 +1876,29 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): op.weights.values = np.reshape(op.inputs[1].values, weight_shape) # Input zero point is adjusted after the sum calculation, so we emulate that with a bias + ofmq, ifmq = op.ofm.quantization, inp.quantization bias = -ifmq.zero_point * h * w bias_shape = [shape[-1]] op.inputs.append(create_const_tensor(op.name + "_bias", bias_shape, DataType.int32, np.ones(bias_shape) * bias)) DebugDatabase.add_optimised(op, op) - # Multiply sum with 1/num_elements_in_axis to get the mean + # Create intermediate tensor between depthwise conv and mul intermediate = op.ofm.clone(suffix="_intermediate", set_unique=True) intermediate.dtype = DataType.int32 + + # Multiply sum with 1/num_elements_in_axis to get the mean mul_op = Operation(Op.Mul, op.name + "_mul") mul_op.add_input_tensor(intermediate) mul_op.set_output_tensor(op.ofm) mul_op.forced_input_quantization = identity_quant + # Set dw conv output to the intermediate tensor + op.set_output_tensor(intermediate) + + # Move activation from original op to mean op + mul_op.activation = op.activation + op.activation = None + # The multiplier is calculated in the same way as in the reference, # clamping the shift value at the price of some precision loss. num_elements_in_axis = int(h * w) @@ -1929,13 +1931,7 @@ def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng): # Need to use explicit scaling to get the wanted shift mul_op.explicit_scaling = ExplicitScaling(False, [output_shift_vela], [1]) - - mul_op.activation = op.activation - op.activation = None - op.set_output_tensor(intermediate) - op.set_ifm_ofm_shapes() DebugDatabase.add_optimised(op, mul_op) - return op @@ -2218,7 +2214,7 @@ def tflite_optimise_graph(nng, arch, force_symmetric_int_weights): # Rewrite of operators op_rewrite_list = [ set_tensor_equivalence, - convert_mean_to_depthwise_conv_or_avgpool, + convert_mean_to_depthwise_conv, convert_depthwise_to_conv, convert_conv_to_fc, convert_lstm, -- cgit v1.2.1