From b90666d9b43f4b5223bb4dcecdbee87b2ad757c2 Mon Sep 17 00:00:00 2001 From: Oscar Andersson Date: Thu, 29 Feb 2024 14:35:58 +0100 Subject: TOSA fixes - Fix TOSA imports - Handle weights connected to Identity nodes - Scaling info was missing in Fully Connected - Disable rescaling fusing for conv-like ops - Explicit scaling was missing for conv-like ops - Handle Const->Identity->Transpose chains - Handle Const->Identity->Reshape chains Change-Id: I063af1f187b6b56105ccf5e8e8b2eb0d3a39dd3b Signed-off-by: Oscar Andersson --- ethosu/vela/tosa/TosaBasicBlock.py | 4 +-- ethosu/vela/tosa/TosaGraph.py | 4 +-- ethosu/vela/tosa/TosaRegion.py | 2 +- ethosu/vela/tosa_graph_optimiser.py | 8 ++--- ethosu/vela/tosa_reader.py | 71 ++++++++++++++++++++++++++++++++----- ethosu/vela/weight_compressor.py | 27 +++++++------- 6 files changed, 85 insertions(+), 31 deletions(-) diff --git a/ethosu/vela/tosa/TosaBasicBlock.py b/ethosu/vela/tosa/TosaBasicBlock.py index b31f455..e003a81 100644 --- a/ethosu/vela/tosa/TosaBasicBlock.py +++ b/ethosu/vela/tosa/TosaBasicBlock.py @@ -42,7 +42,7 @@ class TosaBasicBlock(object): x = self._tab.Vector(o) x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 x = self._tab.Indirect(x) - from tosa.TosaOperator import TosaOperator + from .TosaOperator import TosaOperator obj = TosaOperator() obj.Init(self._tab.Bytes, x) return obj @@ -67,7 +67,7 @@ class TosaBasicBlock(object): x = self._tab.Vector(o) x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 x = self._tab.Indirect(x) - from tosa.TosaTensor import TosaTensor + from .TosaTensor import TosaTensor obj = TosaTensor() obj.Init(self._tab.Bytes, x) return obj diff --git a/ethosu/vela/tosa/TosaGraph.py b/ethosu/vela/tosa/TosaGraph.py index 84b51a7..7068056 100644 --- a/ethosu/vela/tosa/TosaGraph.py +++ b/ethosu/vela/tosa/TosaGraph.py @@ -33,7 +33,7 @@ class TosaGraph(object): o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4)) if o != 0: x = self._tab.Indirect(o + self._tab.Pos) - from tosa.Version import Version + from .Version import Version obj = Version() obj.Init(self._tab.Bytes, x) return obj @@ -46,7 +46,7 @@ class TosaGraph(object): x = self._tab.Vector(o) x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 x = self._tab.Indirect(x) - from tosa.TosaRegion import TosaRegion + from .TosaRegion import TosaRegion obj = TosaRegion() obj.Init(self._tab.Bytes, x) return obj diff --git a/ethosu/vela/tosa/TosaRegion.py b/ethosu/vela/tosa/TosaRegion.py index 7fd6e3c..b8a10e3 100644 --- a/ethosu/vela/tosa/TosaRegion.py +++ b/ethosu/vela/tosa/TosaRegion.py @@ -42,7 +42,7 @@ class TosaRegion(object): x = self._tab.Vector(o) x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 x = self._tab.Indirect(x) - from tosa.TosaBasicBlock import TosaBasicBlock + from .TosaBasicBlock import TosaBasicBlock obj = TosaBasicBlock() obj.Init(self._tab.Bytes, x) return obj diff --git a/ethosu/vela/tosa_graph_optimiser.py b/ethosu/vela/tosa_graph_optimiser.py index c068937..bcb4aac 100644 --- a/ethosu/vela/tosa_graph_optimiser.py +++ b/ethosu/vela/tosa_graph_optimiser.py @@ -387,6 +387,8 @@ def rewrite_rescale(op, arch, nng): ifm.quantization.zero_point = input_zp ofm.quantization.zero_point = output_zp + assert per_channel is False, "per_channel rescale not supported" + for s, m in zip(shift, multiplier): # TODO these are the TOSA limitations assert m >= 0 @@ -403,11 +405,7 @@ def rewrite_rescale(op, arch, nng): # Generate Rescale behaviour attached to a compatible NOP avgpool_op = replace_rescale_with_avg_pool(op) avgpool_op.rounding_mode = rounding_mode - - if per_channel: - assert False, "per_channel rescale not supported" - else: - avgpool_op.explicit_scaling = explicit_scaling + avgpool_op.explicit_scaling = explicit_scaling return op diff --git a/ethosu/vela/tosa_reader.py b/ethosu/vela/tosa_reader.py index 2f37478..9ffda80 100644 --- a/ethosu/vela/tosa_reader.py +++ b/ethosu/vela/tosa_reader.py @@ -131,13 +131,65 @@ class TosaSubgraph: # TODO Transpose_conv and conv3d if op.type.is_depthwise_conv2d_op() or op.type.is_conv2d_op() or op.type == Op.FullyConnected: - if inputs[1].values is not None: - if op.type == Op.FullyConnected: - inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 0), False) - elif op.type.is_conv2d_op(): - inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 3, 0), False) - elif op.type.is_depthwise_conv2d_op(): - inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 0, 3), False) + + def _remove_producing_identity_op(prod_op): + # find the producing op that is not an identity op and return it + while prod_op.type == Op.Identity: + prod_op = prod_op.inputs[0].ops[0] # get previous op + return prod_op + + def _check_and_get_connection(prod_op, tens): + # check weight producing op can be connected to the weight tensor + assert len(prod_op.outputs) == 1 + assert tens.shape == prod_op.outputs[0].shape + # only need to connect the current op connection as the tensor consuming connections haven't been + # initialised yet + return prod_op.outputs[0] + + # remove identity ops directly connected to the weight input of conv like ops + weights_producer_op = _remove_producing_identity_op(inputs[1].ops[0]) + inputs[1] = _check_and_get_connection(weights_producer_op, inputs[1]) # update connection + + if weights_producer_op.type == Op.Transpose: + # remove transpose op such that the weight op will a const op + transpose_op = weights_producer_op + # remove identity ops directly connected to the input of the transpose op + transpose_producer_op = _remove_producing_identity_op(transpose_op.inputs[0].ops[0]) + transpose_op.inputs[0] = _check_and_get_connection( + transpose_producer_op, transpose_op.inputs[0] + ) # update connection + + perms = transpose_op.attrs["perms"] + inputs[1] = clone_and_reshape_tensor(transpose_op.inputs[0], perms, False) + + if weights_producer_op.type == Op.Reshape: + # remove reshape op such that the weight op will a const op + reshape_op = weights_producer_op + # remove identity ops directly connected to the input of the reshape op + reshape_producer_op = _remove_producing_identity_op(reshape_op.inputs[0].ops[0]) + reshape_op.inputs[0] = _check_and_get_connection( + reshape_producer_op, reshape_op.inputs[0] + ) # update connection + + tens = reshape_op.inputs[0].clone("_reshape", False) + tens.values = np.reshape(tens.values, reshape_op.ofm.shape) + tens.shape = reshape_op.ofm.shape + tens._original_shape = tens.shape + tens.bandwidth_shape = tens.shape + tens.storage_shape = tens.shape + + tmp_op = Operation(Op.Const, tens.name) + tmp_op.set_output_tensor(tens) + inputs[1] = tens + + assert inputs[1].values is not None + + if op.type == Op.FullyConnected: + inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 0), False) + elif op.type.is_conv2d_op(): + inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 3, 0), False) + elif op.type.is_depthwise_conv2d_op(): + inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 0, 3), False) if op.type.needs_bias() and len(inputs) <= op_type.info.indices.biases[0]: # No Bias tensor inputs.append(None) @@ -146,10 +198,13 @@ class TosaSubgraph: # a clone with a unique equivalence_id is needed inputs[-1] = clone_and_reshape_tensor(inputs[-1], (0,), True) + op.explicit_scaling = ExplicitScaling(False, [0], [1]) # no scaling + if attr_serializer is not None: op.attrs = attr_serializer.deserialize(op_data) - if "padding" in op.attrs: + if "pad" in op.attrs: + op.attrs["padding"] = op.attrs["pad"] # attribute was renamed to padding padding = op.attrs["padding"] # [top, bottom, left, right] op.attrs["explicit_padding"] = ( padding[0], diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py index a580fb6..b87a2bf 100644 --- a/ethosu/vela/weight_compressor.py +++ b/ethosu/vela/weight_compressor.py @@ -280,23 +280,24 @@ def _prepare_scale_and_bias(arch, tens, explicit_scaling): # If weight_scales is not already an iterable make it into a list weight_scales = [weight_scales] - # Convert scales to np.double (from np.float32) to conform to TensorFlow Lite which - # uses double during scaling calculations - # TensorFlow Lite casts the scales slightly differently for uint8 and int8 as well as - # for FullyConnected operators - if ifm_dtype == DataType.uint8 or first_consumer_op.original_type == Op.FullyConnected: - scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales] - elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16: - scales = [ - (np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale) for weight_scale in weight_scales - ] - else: - raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'") - if explicit_scaling: assert len(explicit_scaling.shift) == len(explicit_scaling.multiplier) quantised_scales = [(int(m), int(s)) for s, m in zip(explicit_scaling.shift, explicit_scaling.multiplier)] else: + # Convert scales to np.double (from np.float32) to conform to TensorFlow Lite which + # uses double during scaling calculations + # TensorFlow Lite casts the scales slightly differently for uint8 and int8 as well as + # for FullyConnected operators + if ifm_dtype == DataType.uint8 or first_consumer_op.original_type == Op.FullyConnected: + scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales] + elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16: + scales = [ + (np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale) + for weight_scale in weight_scales + ] + else: + raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'") + # quantise all of the weight scales into (scale_factor, shift) if ifm_dtype == DataType.int16 and bias_tens.dtype == DataType.int64: # Reference uses reduced scaling for int16 with int64 bias -- cgit v1.2.1