From b90666d9b43f4b5223bb4dcecdbee87b2ad757c2 Mon Sep 17 00:00:00 2001
From: Oscar Andersson <oscar.andersson@arm.com>
Date: Thu, 29 Feb 2024 14:35:58 +0100
Subject: TOSA fixes

- Fix TOSA imports
- Handle weights connected to Identity nodes
- Scaling info was missing in Fully Connected
- Disable rescaling fusing for conv-like ops
- Explicit scaling was missing for conv-like ops
- Handle Const->Identity->Transpose chains
- Handle Const->Identity->Reshape chains

Change-Id: I063af1f187b6b56105ccf5e8e8b2eb0d3a39dd3b
Signed-off-by: Oscar Andersson <oscar.andersson@arm.com>
---
 ethosu/vela/tosa/TosaBasicBlock.py  |  4 +--
 ethosu/vela/tosa/TosaGraph.py       |  4 +--
 ethosu/vela/tosa/TosaRegion.py      |  2 +-
 ethosu/vela/tosa_graph_optimiser.py |  8 ++---
 ethosu/vela/tosa_reader.py          | 71 ++++++++++++++++++++++++++++++++-----
 ethosu/vela/weight_compressor.py    | 27 +++++++-------
 6 files changed, 85 insertions(+), 31 deletions(-)

diff --git a/ethosu/vela/tosa/TosaBasicBlock.py b/ethosu/vela/tosa/TosaBasicBlock.py
index b31f455..e003a81 100644
--- a/ethosu/vela/tosa/TosaBasicBlock.py
+++ b/ethosu/vela/tosa/TosaBasicBlock.py
@@ -42,7 +42,7 @@ class TosaBasicBlock(object):
             x = self._tab.Vector(o)
             x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
             x = self._tab.Indirect(x)
-            from tosa.TosaOperator import TosaOperator
+            from .TosaOperator import TosaOperator
             obj = TosaOperator()
             obj.Init(self._tab.Bytes, x)
             return obj
@@ -67,7 +67,7 @@ class TosaBasicBlock(object):
             x = self._tab.Vector(o)
             x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
             x = self._tab.Indirect(x)
-            from tosa.TosaTensor import TosaTensor
+            from .TosaTensor import TosaTensor
             obj = TosaTensor()
             obj.Init(self._tab.Bytes, x)
             return obj
diff --git a/ethosu/vela/tosa/TosaGraph.py b/ethosu/vela/tosa/TosaGraph.py
index 84b51a7..7068056 100644
--- a/ethosu/vela/tosa/TosaGraph.py
+++ b/ethosu/vela/tosa/TosaGraph.py
@@ -33,7 +33,7 @@ class TosaGraph(object):
         o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
         if o != 0:
             x = self._tab.Indirect(o + self._tab.Pos)
-            from tosa.Version import Version
+            from .Version import Version
             obj = Version()
             obj.Init(self._tab.Bytes, x)
             return obj
@@ -46,7 +46,7 @@ class TosaGraph(object):
             x = self._tab.Vector(o)
             x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
             x = self._tab.Indirect(x)
-            from tosa.TosaRegion import TosaRegion
+            from .TosaRegion import TosaRegion
             obj = TosaRegion()
             obj.Init(self._tab.Bytes, x)
             return obj
diff --git a/ethosu/vela/tosa/TosaRegion.py b/ethosu/vela/tosa/TosaRegion.py
index 7fd6e3c..b8a10e3 100644
--- a/ethosu/vela/tosa/TosaRegion.py
+++ b/ethosu/vela/tosa/TosaRegion.py
@@ -42,7 +42,7 @@ class TosaRegion(object):
             x = self._tab.Vector(o)
             x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
             x = self._tab.Indirect(x)
-            from tosa.TosaBasicBlock import TosaBasicBlock
+            from .TosaBasicBlock import TosaBasicBlock
             obj = TosaBasicBlock()
             obj.Init(self._tab.Bytes, x)
             return obj
diff --git a/ethosu/vela/tosa_graph_optimiser.py b/ethosu/vela/tosa_graph_optimiser.py
index c068937..bcb4aac 100644
--- a/ethosu/vela/tosa_graph_optimiser.py
+++ b/ethosu/vela/tosa_graph_optimiser.py
@@ -387,6 +387,8 @@ def rewrite_rescale(op, arch, nng):
         ifm.quantization.zero_point = input_zp
         ofm.quantization.zero_point = output_zp
 
+        assert per_channel is False, "per_channel rescale not supported"
+
         for s, m in zip(shift, multiplier):
             # TODO these are the TOSA limitations
             assert m >= 0
@@ -403,11 +405,7 @@ def rewrite_rescale(op, arch, nng):
         # Generate Rescale behaviour attached to a compatible NOP
         avgpool_op = replace_rescale_with_avg_pool(op)
         avgpool_op.rounding_mode = rounding_mode
-            
-        if per_channel:
-            assert False, "per_channel rescale not supported"
-        else:
-            avgpool_op.explicit_scaling = explicit_scaling
+        avgpool_op.explicit_scaling = explicit_scaling
 
     return op
 
diff --git a/ethosu/vela/tosa_reader.py b/ethosu/vela/tosa_reader.py
index 2f37478..9ffda80 100644
--- a/ethosu/vela/tosa_reader.py
+++ b/ethosu/vela/tosa_reader.py
@@ -131,13 +131,65 @@ class TosaSubgraph:
 
         # TODO Transpose_conv and conv3d
         if op.type.is_depthwise_conv2d_op() or op.type.is_conv2d_op() or op.type == Op.FullyConnected:
-            if inputs[1].values is not None:
-                if op.type == Op.FullyConnected:
-                    inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 0), False)
-                elif op.type.is_conv2d_op():
-                    inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 3, 0), False)
-                elif op.type.is_depthwise_conv2d_op():
-                    inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 0, 3), False)
+
+            def _remove_producing_identity_op(prod_op):
+                # find the producing op that is not an identity op and return it
+                while prod_op.type == Op.Identity:
+                    prod_op = prod_op.inputs[0].ops[0]  # get previous op
+                return prod_op
+
+            def _check_and_get_connection(prod_op, tens):
+                # check weight producing op can be connected to the weight tensor
+                assert len(prod_op.outputs) == 1
+                assert tens.shape == prod_op.outputs[0].shape
+                # only need to connect the current op connection as the tensor consuming connections haven't been
+                # initialised yet
+                return prod_op.outputs[0]
+
+            # remove identity ops directly connected to the weight input of conv like ops
+            weights_producer_op = _remove_producing_identity_op(inputs[1].ops[0])
+            inputs[1] = _check_and_get_connection(weights_producer_op, inputs[1])  # update connection
+
+            if weights_producer_op.type == Op.Transpose:
+                # remove transpose op such that the weight op will a const op
+                transpose_op = weights_producer_op
+                # remove identity ops directly connected to the input of the transpose op
+                transpose_producer_op = _remove_producing_identity_op(transpose_op.inputs[0].ops[0])
+                transpose_op.inputs[0] = _check_and_get_connection(
+                    transpose_producer_op, transpose_op.inputs[0]
+                )  # update connection
+
+                perms = transpose_op.attrs["perms"]
+                inputs[1] = clone_and_reshape_tensor(transpose_op.inputs[0], perms, False)
+
+            if weights_producer_op.type == Op.Reshape:
+                # remove reshape op such that the weight op will a const op
+                reshape_op = weights_producer_op
+                # remove identity ops directly connected to the input of the reshape op
+                reshape_producer_op = _remove_producing_identity_op(reshape_op.inputs[0].ops[0])
+                reshape_op.inputs[0] = _check_and_get_connection(
+                    reshape_producer_op, reshape_op.inputs[0]
+                )  # update connection
+
+                tens = reshape_op.inputs[0].clone("_reshape", False)
+                tens.values = np.reshape(tens.values, reshape_op.ofm.shape)
+                tens.shape = reshape_op.ofm.shape
+                tens._original_shape = tens.shape
+                tens.bandwidth_shape = tens.shape
+                tens.storage_shape = tens.shape
+
+                tmp_op = Operation(Op.Const, tens.name)
+                tmp_op.set_output_tensor(tens)
+                inputs[1] = tens
+
+            assert inputs[1].values is not None
+
+            if op.type == Op.FullyConnected:
+                inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 0), False)
+            elif op.type.is_conv2d_op():
+                inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 3, 0), False)
+            elif op.type.is_depthwise_conv2d_op():
+                inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 0, 3), False)
             if op.type.needs_bias() and len(inputs) <= op_type.info.indices.biases[0]:
                 # No Bias tensor
                 inputs.append(None)
@@ -146,10 +198,13 @@ class TosaSubgraph:
                 # a clone with a unique equivalence_id is needed
                 inputs[-1] = clone_and_reshape_tensor(inputs[-1], (0,), True)
 
+            op.explicit_scaling = ExplicitScaling(False, [0], [1])  # no scaling
+
         if attr_serializer is not None:
             op.attrs = attr_serializer.deserialize(op_data)
 
-            if "padding" in op.attrs:
+            if "pad" in op.attrs:
+                op.attrs["padding"] = op.attrs["pad"]  # attribute was renamed to padding
                 padding = op.attrs["padding"]  # [top, bottom, left, right]
                 op.attrs["explicit_padding"] = (
                     padding[0],
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index a580fb6..b87a2bf 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -280,23 +280,24 @@ def _prepare_scale_and_bias(arch, tens, explicit_scaling):
         # If weight_scales is not already an iterable make it into a list
         weight_scales = [weight_scales]
 
-    # Convert scales to np.double (from np.float32) to conform to TensorFlow Lite which
-    # uses double during scaling calculations
-    # TensorFlow Lite casts the scales slightly differently for uint8 and int8 as well as
-    # for FullyConnected operators
-    if ifm_dtype == DataType.uint8 or first_consumer_op.original_type == Op.FullyConnected:
-        scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales]
-    elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
-        scales = [
-            (np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale) for weight_scale in weight_scales
-        ]
-    else:
-        raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'")
-
     if explicit_scaling:
         assert len(explicit_scaling.shift) == len(explicit_scaling.multiplier)
         quantised_scales = [(int(m), int(s)) for s, m in zip(explicit_scaling.shift, explicit_scaling.multiplier)]
     else:
+        # Convert scales to np.double (from np.float32) to conform to TensorFlow Lite which
+        # uses double during scaling calculations
+        # TensorFlow Lite casts the scales slightly differently for uint8 and int8 as well as
+        # for FullyConnected operators
+        if ifm_dtype == DataType.uint8 or first_consumer_op.original_type == Op.FullyConnected:
+            scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales]
+        elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
+            scales = [
+                (np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale)
+                for weight_scale in weight_scales
+            ]
+        else:
+            raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'")
+
         # quantise all of the weight scales into (scale_factor, shift)
         if ifm_dtype == DataType.int16 and bias_tens.dtype == DataType.int64:
             # Reference uses reduced scaling for int16 with int64 bias
-- 
cgit v1.2.1