aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOscar Andersson <oscar.andersson@arm.com>2024-02-29 14:35:58 +0100
committeroscarandersson8218 <oscar.andersson@arm.com>2024-03-07 15:54:54 +0100
commitb90666d9b43f4b5223bb4dcecdbee87b2ad757c2 (patch)
treef4f3ce9c1a6af4625f68a21e68139140a4dd2119
parent9341bf4b8de7b5a4db72281490492f6e32b0c605 (diff)
downloadethos-u-vela-b90666d9b43f4b5223bb4dcecdbee87b2ad757c2.tar.gz
TOSA fixes
- Fix TOSA imports - Handle weights connected to Identity nodes - Scaling info was missing in Fully Connected - Disable rescaling fusing for conv-like ops - Explicit scaling was missing for conv-like ops - Handle Const->Identity->Transpose chains - Handle Const->Identity->Reshape chains Change-Id: I063af1f187b6b56105ccf5e8e8b2eb0d3a39dd3b Signed-off-by: Oscar Andersson <oscar.andersson@arm.com>
-rw-r--r--ethosu/vela/tosa/TosaBasicBlock.py4
-rw-r--r--ethosu/vela/tosa/TosaGraph.py4
-rw-r--r--ethosu/vela/tosa/TosaRegion.py2
-rw-r--r--ethosu/vela/tosa_graph_optimiser.py8
-rw-r--r--ethosu/vela/tosa_reader.py71
-rw-r--r--ethosu/vela/weight_compressor.py27
6 files changed, 85 insertions, 31 deletions
diff --git a/ethosu/vela/tosa/TosaBasicBlock.py b/ethosu/vela/tosa/TosaBasicBlock.py
index b31f455..e003a81 100644
--- a/ethosu/vela/tosa/TosaBasicBlock.py
+++ b/ethosu/vela/tosa/TosaBasicBlock.py
@@ -42,7 +42,7 @@ class TosaBasicBlock(object):
x = self._tab.Vector(o)
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
x = self._tab.Indirect(x)
- from tosa.TosaOperator import TosaOperator
+ from .TosaOperator import TosaOperator
obj = TosaOperator()
obj.Init(self._tab.Bytes, x)
return obj
@@ -67,7 +67,7 @@ class TosaBasicBlock(object):
x = self._tab.Vector(o)
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
x = self._tab.Indirect(x)
- from tosa.TosaTensor import TosaTensor
+ from .TosaTensor import TosaTensor
obj = TosaTensor()
obj.Init(self._tab.Bytes, x)
return obj
diff --git a/ethosu/vela/tosa/TosaGraph.py b/ethosu/vela/tosa/TosaGraph.py
index 84b51a7..7068056 100644
--- a/ethosu/vela/tosa/TosaGraph.py
+++ b/ethosu/vela/tosa/TosaGraph.py
@@ -33,7 +33,7 @@ class TosaGraph(object):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
if o != 0:
x = self._tab.Indirect(o + self._tab.Pos)
- from tosa.Version import Version
+ from .Version import Version
obj = Version()
obj.Init(self._tab.Bytes, x)
return obj
@@ -46,7 +46,7 @@ class TosaGraph(object):
x = self._tab.Vector(o)
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
x = self._tab.Indirect(x)
- from tosa.TosaRegion import TosaRegion
+ from .TosaRegion import TosaRegion
obj = TosaRegion()
obj.Init(self._tab.Bytes, x)
return obj
diff --git a/ethosu/vela/tosa/TosaRegion.py b/ethosu/vela/tosa/TosaRegion.py
index 7fd6e3c..b8a10e3 100644
--- a/ethosu/vela/tosa/TosaRegion.py
+++ b/ethosu/vela/tosa/TosaRegion.py
@@ -42,7 +42,7 @@ class TosaRegion(object):
x = self._tab.Vector(o)
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
x = self._tab.Indirect(x)
- from tosa.TosaBasicBlock import TosaBasicBlock
+ from .TosaBasicBlock import TosaBasicBlock
obj = TosaBasicBlock()
obj.Init(self._tab.Bytes, x)
return obj
diff --git a/ethosu/vela/tosa_graph_optimiser.py b/ethosu/vela/tosa_graph_optimiser.py
index c068937..bcb4aac 100644
--- a/ethosu/vela/tosa_graph_optimiser.py
+++ b/ethosu/vela/tosa_graph_optimiser.py
@@ -387,6 +387,8 @@ def rewrite_rescale(op, arch, nng):
ifm.quantization.zero_point = input_zp
ofm.quantization.zero_point = output_zp
+ assert per_channel is False, "per_channel rescale not supported"
+
for s, m in zip(shift, multiplier):
# TODO these are the TOSA limitations
assert m >= 0
@@ -403,11 +405,7 @@ def rewrite_rescale(op, arch, nng):
# Generate Rescale behaviour attached to a compatible NOP
avgpool_op = replace_rescale_with_avg_pool(op)
avgpool_op.rounding_mode = rounding_mode
-
- if per_channel:
- assert False, "per_channel rescale not supported"
- else:
- avgpool_op.explicit_scaling = explicit_scaling
+ avgpool_op.explicit_scaling = explicit_scaling
return op
diff --git a/ethosu/vela/tosa_reader.py b/ethosu/vela/tosa_reader.py
index 2f37478..9ffda80 100644
--- a/ethosu/vela/tosa_reader.py
+++ b/ethosu/vela/tosa_reader.py
@@ -131,13 +131,65 @@ class TosaSubgraph:
# TODO Transpose_conv and conv3d
if op.type.is_depthwise_conv2d_op() or op.type.is_conv2d_op() or op.type == Op.FullyConnected:
- if inputs[1].values is not None:
- if op.type == Op.FullyConnected:
- inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 0), False)
- elif op.type.is_conv2d_op():
- inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 3, 0), False)
- elif op.type.is_depthwise_conv2d_op():
- inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 0, 3), False)
+
+ def _remove_producing_identity_op(prod_op):
+ # find the producing op that is not an identity op and return it
+ while prod_op.type == Op.Identity:
+ prod_op = prod_op.inputs[0].ops[0] # get previous op
+ return prod_op
+
+ def _check_and_get_connection(prod_op, tens):
+ # check weight producing op can be connected to the weight tensor
+ assert len(prod_op.outputs) == 1
+ assert tens.shape == prod_op.outputs[0].shape
+ # only need to connect the current op connection as the tensor consuming connections haven't been
+ # initialised yet
+ return prod_op.outputs[0]
+
+ # remove identity ops directly connected to the weight input of conv like ops
+ weights_producer_op = _remove_producing_identity_op(inputs[1].ops[0])
+ inputs[1] = _check_and_get_connection(weights_producer_op, inputs[1]) # update connection
+
+ if weights_producer_op.type == Op.Transpose:
+ # remove transpose op such that the weight op will a const op
+ transpose_op = weights_producer_op
+ # remove identity ops directly connected to the input of the transpose op
+ transpose_producer_op = _remove_producing_identity_op(transpose_op.inputs[0].ops[0])
+ transpose_op.inputs[0] = _check_and_get_connection(
+ transpose_producer_op, transpose_op.inputs[0]
+ ) # update connection
+
+ perms = transpose_op.attrs["perms"]
+ inputs[1] = clone_and_reshape_tensor(transpose_op.inputs[0], perms, False)
+
+ if weights_producer_op.type == Op.Reshape:
+ # remove reshape op such that the weight op will a const op
+ reshape_op = weights_producer_op
+ # remove identity ops directly connected to the input of the reshape op
+ reshape_producer_op = _remove_producing_identity_op(reshape_op.inputs[0].ops[0])
+ reshape_op.inputs[0] = _check_and_get_connection(
+ reshape_producer_op, reshape_op.inputs[0]
+ ) # update connection
+
+ tens = reshape_op.inputs[0].clone("_reshape", False)
+ tens.values = np.reshape(tens.values, reshape_op.ofm.shape)
+ tens.shape = reshape_op.ofm.shape
+ tens._original_shape = tens.shape
+ tens.bandwidth_shape = tens.shape
+ tens.storage_shape = tens.shape
+
+ tmp_op = Operation(Op.Const, tens.name)
+ tmp_op.set_output_tensor(tens)
+ inputs[1] = tens
+
+ assert inputs[1].values is not None
+
+ if op.type == Op.FullyConnected:
+ inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 0), False)
+ elif op.type.is_conv2d_op():
+ inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 3, 0), False)
+ elif op.type.is_depthwise_conv2d_op():
+ inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 0, 3), False)
if op.type.needs_bias() and len(inputs) <= op_type.info.indices.biases[0]:
# No Bias tensor
inputs.append(None)
@@ -146,10 +198,13 @@ class TosaSubgraph:
# a clone with a unique equivalence_id is needed
inputs[-1] = clone_and_reshape_tensor(inputs[-1], (0,), True)
+ op.explicit_scaling = ExplicitScaling(False, [0], [1]) # no scaling
+
if attr_serializer is not None:
op.attrs = attr_serializer.deserialize(op_data)
- if "padding" in op.attrs:
+ if "pad" in op.attrs:
+ op.attrs["padding"] = op.attrs["pad"] # attribute was renamed to padding
padding = op.attrs["padding"] # [top, bottom, left, right]
op.attrs["explicit_padding"] = (
padding[0],
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index a580fb6..b87a2bf 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -280,23 +280,24 @@ def _prepare_scale_and_bias(arch, tens, explicit_scaling):
# If weight_scales is not already an iterable make it into a list
weight_scales = [weight_scales]
- # Convert scales to np.double (from np.float32) to conform to TensorFlow Lite which
- # uses double during scaling calculations
- # TensorFlow Lite casts the scales slightly differently for uint8 and int8 as well as
- # for FullyConnected operators
- if ifm_dtype == DataType.uint8 or first_consumer_op.original_type == Op.FullyConnected:
- scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales]
- elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
- scales = [
- (np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale) for weight_scale in weight_scales
- ]
- else:
- raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'")
-
if explicit_scaling:
assert len(explicit_scaling.shift) == len(explicit_scaling.multiplier)
quantised_scales = [(int(m), int(s)) for s, m in zip(explicit_scaling.shift, explicit_scaling.multiplier)]
else:
+ # Convert scales to np.double (from np.float32) to conform to TensorFlow Lite which
+ # uses double during scaling calculations
+ # TensorFlow Lite casts the scales slightly differently for uint8 and int8 as well as
+ # for FullyConnected operators
+ if ifm_dtype == DataType.uint8 or first_consumer_op.original_type == Op.FullyConnected:
+ scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales]
+ elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
+ scales = [
+ (np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale)
+ for weight_scale in weight_scales
+ ]
+ else:
+ raise UnsupportedFeatureError(f"Compression of {ifm_dtype} is not implemented; Tensor: '{tens.name}'")
+
# quantise all of the weight scales into (scale_factor, shift)
if ifm_dtype == DataType.int16 and bias_tens.dtype == DataType.int64:
# Reference uses reduced scaling for int16 with int64 bias