diff options
author | Fredrik Svedberg <fredrik.svedberg@arm.com> | 2023-04-11 22:35:04 +0200 |
---|---|---|
committer | Fredrik Svedberg <fredrik.svedberg@arm.com> | 2023-04-17 14:16:44 +0200 |
commit | 0ac0804e76e098695ee2b8a9e24e2f0a1efc324f (patch) | |
tree | 9ccb766221987a415244079ed6c596a47d693b20 /ethosu/vela/tflite_graph_optimiser.py | |
parent | c1ad80b3a581dd39b39a112d6c2026f6560207a4 (diff) | |
download | ethos-u-vela-0ac0804e76e098695ee2b8a9e24e2f0a1efc324f.tar.gz |
MLBEDSW-7196 Add LSTM support
Added int8 and int16 UNIDIRECTIONAL_SEQUENCE_LSTM support.
The implementation does not include support for:
* CIFG
* Peephole
* Projection
* Normalisation
This change also:
* Removed unused Op.BlockLSTM operation type.
* Removed the only one consumer limitation on putting the SplitSliceRead
on the tensor consumer(s), if all consumers fullfills the requirements
* Added Op.VariableTensorWrite as a Operation.memory_function to make
sure writes to variable tensors:
* Always use linear mode
* Are not moved to fast scratch
* Are not fused with other elementwise operation tensor ranges
Change-Id: Ief831738924ac3d1f2ba6d41f10bd6dc969911f3
Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
Diffstat (limited to 'ethosu/vela/tflite_graph_optimiser.py')
-rw-r--r-- | ethosu/vela/tflite_graph_optimiser.py | 50 |
1 files changed, 21 insertions, 29 deletions
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py index 077f4afa..478d0189 100644 --- a/ethosu/vela/tflite_graph_optimiser.py +++ b/ethosu/vela/tflite_graph_optimiser.py @@ -35,11 +35,13 @@ from .graph_optimiser_util import bypass_memory_only_ops from .graph_optimiser_util import calc_explicit_padding from .graph_optimiser_util import convert_depthwise_to_conv from .graph_optimiser_util import convert_to_lut +from .graph_optimiser_util import create_avg_pool_for_concat from .graph_optimiser_util import memory_only_ops from .graph_optimiser_util import move_splitsliceread_to_consumer from .graph_optimiser_util import needed_total_padding from .graph_optimiser_util import set_ifm_ofm_op_shapes from .graph_optimiser_util import set_tensor_equivalence +from .lstm import Lstm from .numeric_util import clamp_sigmoid from .numeric_util import full_shape from .numeric_util import round_away_zero @@ -69,23 +71,6 @@ from .tflite_mapping import optype_to_builtintype passthrough_nodes = (Op.Identity,) -def create_avg_pool_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D): - """Creates an average pool for the given concat op/input feature map""" - ofm = concat_op.ofm - avgpool_op = create_avgpool_nop(name) - avgpool_op.inputs = [ifm] - avgpool_op.outputs = [ofm] - - avgpool_op.write_offset = write_offset - avgpool_op.write_shape = ifm_shape - ofm.ops.append(avgpool_op) - avgpool_op.ifm_shapes.append(ifm_shape) - avgpool_op.ofm_shapes.append(concat_op.ofm_shapes[0]) - avgpool_op.memory_function = Op.ConcatSliceWrite - DebugDatabase.add_optimised(concat_op, avgpool_op) - return avgpool_op - - def remove_passthrough_tensor(tens, arch, nng): if len(tens.ops) == 1 and tens.ops[0].type in passthrough_nodes: assert len(tens.ops[0].inputs) == 1 @@ -196,17 +181,15 @@ def rewrite_split_ops(tens, arch, nng): def remove_SplitSliceRead(op, arch): if op.type == Op.SplitSliceRead: - # Check if it is possible to put the SplitSliceRead on the tensor consumer, or if an avgpool need to be inserted - if ( - len(op.ofm.consumer_list) == 1 - and op.ofm.consumer_list[0] is not None - and op.ofm.consumer_list[0].run_on_npu - and op.ofm.consumer_list[0].type not in memory_only_ops - and op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape) + # Check if it is possible to put the SplitSliceRead on the tensor consumer(s), + # or if an avgpool need to be inserted + if op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape) and all( + consumer is not None and consumer.run_on_npu and consumer.type not in memory_only_ops + for consumer in op.ofm.consumer_list ): - # SplitSliceRead can be performed by tensor consumer - cons_op = op.ofm.consumer_list[0] - move_splitsliceread_to_consumer(op, cons_op) + # SplitSliceRead can be performed by tensor consumer(s) + for cons_op in list(op.ofm.consumer_list): + move_splitsliceread_to_consumer(op, cons_op) else: avgpool_op = create_avgpool_nop(op.name + "_avgpool") avgpool_op.add_input_tensor(op.ifm) @@ -801,8 +784,9 @@ def convert_nop_split_to_identity(op, arch, nng): def rewrite_fully_connected_input(op: Operation, arch, nng): - - if op.type == Op.FullyConnected: + # If the operation already have a read shape do not modify + # the ifm shape, since that will already be correct + if op.type == Op.FullyConnected and not op.read_shapes[0]: new_shape = op.ifm.get_shape_as_2d(op.weights.shape[-2]) assert new_shape is not None, "Tensor can not be reshaped to 2D" op.ifm_shapes[0] = new_shape @@ -1080,6 +1064,13 @@ def fixup_relus_with_differing_ifm_ofm_scaling(op, arch, nng): return op +def convert_lstm(op, arch, nng): + if op.type == Op.UnidirectionalSequenceLstm: + lstm = Lstm(op) + op = lstm.get_graph() + return op + + def convert_softmax(op, arch, nng): if op.type == Op.Softmax and op.run_on_npu: softmax = SoftMax(op) @@ -2144,6 +2135,7 @@ def tflite_optimise_graph(nng, arch, force_symmetric_int_weights): convert_mean_to_depthwise_conv_or_avgpool, convert_depthwise_to_conv, convert_conv_to_fc, + convert_lstm, convert_softmax, convert_prelu, convert_mul_max_to_abs_or_lrelu, |