TOSA fixes

- Fix TOSA imports - Handle weights connected to Identity nodes - Scaling info was missing in Fully Connected - Disable rescaling fusing for conv-like ops - Explicit scaling was missing for conv-like ops - Handle Const->Identity->Transpose chains - Handle Const->Identity->Reshape chains Change-Id: I063af1f187b6b56105ccf5e8e8b2eb0d3a39dd3b Signed-off-by: Oscar Andersson <oscar.andersson@arm.com>
author: Oscar Andersson <oscar.andersson@arm.com> 2024-02-29 14:35:58 +0100
committer: oscarandersson8218 <oscar.andersson@arm.com> 2024-03-07 15:54:54 +0100
commit: b90666d9b43f4b5223bb4dcecdbee87b2ad757c2 (patch)
tree: f4f3ce9c1a6af4625f68a21e68139140a4dd2119 /ethosu/vela/weight_compressor.py
parent: 9341bf4b8de7b5a4db72281490492f6e32b0c605 (diff)
download: ethos-u-vela-b90666d9b43f4b5223bb4dcecdbee87b2ad757c2.tar.gz
1 files changed, 14 insertions, 13 deletions
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index a580fb61..b87a2bfb 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -280,23 +280,24 @@ def _prepare_scale_and_bias(arch, tens, explicit_scaling):
         # If weight_scales is not already an iterable make it into a list
         weight_scales = [weight_scales]
 
-    # Convert scales to np.double (from np.float32) to conform to TensorFlow Lite which
-    # uses double during scaling calculations
-    # TensorFlow Lite casts the scales slightly differently for uint8 and int8 as well as
-    # for FullyConnected operators
-    if ifm_dtype == DataType.uint8 or first_consumer_op.original_type == Op.FullyConnected:
-        scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales]
-    elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
-        scales = [
-            (np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale) for weight_scale in weight_scales
-        ]
-    else:
-        raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'")
-
     if explicit_scaling:
         assert len(explicit_scaling.shift) == len(explicit_scaling.multiplier)
         quantised_scales = [(int(m), int(s)) for s, m in zip(explicit_scaling.shift, explicit_scaling.multiplier)]
     else:
+        # Convert scales to np.double (from np.float32) to conform to TensorFlow Lite which
+        # uses double during scaling calculations
+        # TensorFlow Lite casts the scales slightly differently for uint8 and int8 as well as
+        # for FullyConnected operators
+        if ifm_dtype == DataType.uint8 or first_consumer_op.original_type == Op.FullyConnected:
+            scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales]
+        elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
+            scales = [
+                (np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale)
+                for weight_scale in weight_scales
+            ]
+        else:
+            raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'")
+
         # quantise all of the weight scales into (scale_factor, shift)
         if ifm_dtype == DataType.int16 and bias_tens.dtype == DataType.int64:
             # Reference uses reduced scaling for int16 with int64 bias
author	Oscar Andersson <oscar.andersson@arm.com>	2024-02-29 14:35:58 +0100
committer	oscarandersson8218 <oscar.andersson@arm.com>	2024-03-07 15:54:54 +0100
commit	b90666d9b43f4b5223bb4dcecdbee87b2ad757c2 (patch)
tree	f4f3ce9c1a6af4625f68a21e68139140a4dd2119 /ethosu/vela/weight_compressor.py
parent	9341bf4b8de7b5a4db72281490492f6e32b0c605 (diff)
download	ethos-u-vela-b90666d9b43f4b5223bb4dcecdbee87b2ad757c2.tar.gz