diff options
author | Johan Alfven <johan.alfven@arm.com> | 2023-04-13 10:13:56 +0200 |
---|---|---|
committer | Johan Alfven <johan.alfven@arm.com> | 2023-04-24 12:56:44 +0200 |
commit | c4268bf407048c7899c8501dd1223a777f8c4963 (patch) | |
tree | eaadb0341c0989778542ccabcaf56fbddd592e8a /ethosu | |
parent | 301ca6046884ccacd6cb4d64bd4c4869ff66b4bf (diff) | |
download | ethos-u-vela-c4268bf407048c7899c8501dd1223a777f8c4963.tar.gz |
MLBEDSW-7501: Vela unnecessary adds reshaped weights tensors
- Weights are internally cloned and reshaped/transposed when
running on the NPU. This happens already in the reader. If
the op is passed through to the CPU there are code that writes
backs these clones but with another round of reshape/transpose.
This adds extra tensors in the optimized file compared to the
original file if the original tensors are subgraph inputs.
- If the op is passed trough to the CPU the clones should not
be written to the file. Solved this by setting the src_tensor
when making the clone.
Change-Id: I9f55d542c099882882920bffe8e15b43b2ca2c8d
Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Diffstat (limited to 'ethosu')
-rw-r--r-- | ethosu/vela/tensor.py | 1 | ||||
-rw-r--r-- | ethosu/vela/tflite_writer.py | 21 |
2 files changed, 7 insertions, 15 deletions
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py index 9ba6ab77..6ba331c4 100644 --- a/ethosu/vela/tensor.py +++ b/ethosu/vela/tensor.py @@ -506,6 +506,7 @@ class Tensor: res.name = res.name + suffix res.ops = [] res.consumer_list = [] + res.src_tensor = self return res diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py index 625cf7cc..c8250c6e 100644 --- a/ethosu/vela/tflite_writer.py +++ b/ethosu/vela/tflite_writer.py @@ -90,8 +90,6 @@ class TFLiteSerialiser: self.ops_to_ignore = (Op.Const, Op.Placeholder, Op.SubgraphInput) - self.tensors_to_reshape = {} - self.subgraphs_to_write = [sg for sg in self.nng.subgraphs if sg.placement == PassPlacement.Cpu] all_ops = [] @@ -102,14 +100,12 @@ class TFLiteSerialiser: # swap from nng input indexing to TensorFlow Lite input indexing self.align_nng_inputs_to_tflite(op) all_ops.append(op) - if op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op(): - # If values are None op has non-constant weights - if op.inputs[1].values is not None: - self.tensors_to_reshape[op.inputs[1]] = (3, 0, 1, 2) - if op.type == Op.FullyConnected: - # If values are None op has non-constant weights - if op.inputs[1].values is not None: - self.tensors_to_reshape[op.inputs[1]] = (1, 0) + if op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op() or op.type == Op.FullyConnected: + # Op is run on CPU, make sure original tensor are written back + # instead of the cloned/reshaped (see tflite_reader) + for idx, inp in enumerate(op.inputs): + if inp is not None and inp.src_tensor is not None: + op.inputs[idx] = inp.src_tensor # list of tuple(Op, string); the custom code is only used for 3rd party custom operators self.operator_codes = sorted(set((op.type, op.attrs.get("custom_code", "")) for op in all_ops)) @@ -259,11 +255,6 @@ class TFLiteSerialiser: tens_shape = tens.original_shape values = tens.values - if tens in self.tensors_to_reshape: - reorder = self.tensors_to_reshape[tens] - tens_shape = [tens_shape[idx] for idx in reorder] - values = values.transpose(reorder) - buf_id = self.buffer_map[tens] self.buffers_to_write[buf_id] = None if values is None else values.flatten().view(np.uint8) |