MLBEDSW-7196 Add LSTM support

Added int8 and int16 UNIDIRECTIONAL_SEQUENCE_LSTM support. The implementation does not include support for: * CIFG * Peephole * Projection * Normalisation This change also: * Removed unused Op.BlockLSTM operation type. * Removed the only one consumer limitation on putting the SplitSliceRead on the tensor consumer(s), if all consumers fullfills the requirements * Added Op.VariableTensorWrite as a Operation.memory_function to make sure writes to variable tensors: * Always use linear mode * Are not moved to fast scratch * Are not fused with other elementwise operation tensor ranges Change-Id: Ief831738924ac3d1f2ba6d41f10bd6dc969911f3 Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
author: Fredrik Svedberg <fredrik.svedberg@arm.com> 2023-04-11 22:35:04 +0200
committer: Fredrik Svedberg <fredrik.svedberg@arm.com> 2023-04-17 14:16:44 +0200
commit: 0ac0804e76e098695ee2b8a9e24e2f0a1efc324f (patch)
tree: 9ccb766221987a415244079ed6c596a47d693b20 /ethosu/vela/tflite_graph_optimiser.py
parent: c1ad80b3a581dd39b39a112d6c2026f6560207a4 (diff)
download: ethos-u-vela-0ac0804e76e098695ee2b8a9e24e2f0a1efc324f.tar.gz
1 files changed, 21 insertions, 29 deletions
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 077f4afa..478d0189 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -35,11 +35,13 @@ from .graph_optimiser_util import bypass_memory_only_ops
 from .graph_optimiser_util import calc_explicit_padding
 from .graph_optimiser_util import convert_depthwise_to_conv
 from .graph_optimiser_util import convert_to_lut
+from .graph_optimiser_util import create_avg_pool_for_concat
 from .graph_optimiser_util import memory_only_ops
 from .graph_optimiser_util import move_splitsliceread_to_consumer
 from .graph_optimiser_util import needed_total_padding
 from .graph_optimiser_util import set_ifm_ofm_op_shapes
 from .graph_optimiser_util import set_tensor_equivalence
+from .lstm import Lstm
 from .numeric_util import clamp_sigmoid
 from .numeric_util import full_shape
 from .numeric_util import round_away_zero
@@ -69,23 +71,6 @@ from .tflite_mapping import optype_to_builtintype
 passthrough_nodes = (Op.Identity,)
 
 
-def create_avg_pool_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D):
-    """Creates an average pool for the given concat op/input feature map"""
-    ofm = concat_op.ofm
-    avgpool_op = create_avgpool_nop(name)
-    avgpool_op.inputs = [ifm]
-    avgpool_op.outputs = [ofm]
-
-    avgpool_op.write_offset = write_offset
-    avgpool_op.write_shape = ifm_shape
-    ofm.ops.append(avgpool_op)
-    avgpool_op.ifm_shapes.append(ifm_shape)
-    avgpool_op.ofm_shapes.append(concat_op.ofm_shapes[0])
-    avgpool_op.memory_function = Op.ConcatSliceWrite
-    DebugDatabase.add_optimised(concat_op, avgpool_op)
-    return avgpool_op
-
-
 def remove_passthrough_tensor(tens, arch, nng):
     if len(tens.ops) == 1 and tens.ops[0].type in passthrough_nodes:
         assert len(tens.ops[0].inputs) == 1
@@ -196,17 +181,15 @@ def rewrite_split_ops(tens, arch, nng):
 def remove_SplitSliceRead(op, arch):
 
     if op.type == Op.SplitSliceRead:
-        # Check if it is possible to put the SplitSliceRead on the tensor consumer, or if an avgpool need to be inserted
-        if (
-            len(op.ofm.consumer_list) == 1
-            and op.ofm.consumer_list[0] is not None
-            and op.ofm.consumer_list[0].run_on_npu
-            and op.ofm.consumer_list[0].type not in memory_only_ops
-            and op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape)
+        # Check if it is possible to put the SplitSliceRead on the tensor consumer(s),
+        # or if an avgpool need to be inserted
+        if op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape) and all(
+            consumer is not None and consumer.run_on_npu and consumer.type not in memory_only_ops
+            for consumer in op.ofm.consumer_list
         ):
-            # SplitSliceRead can be performed by tensor consumer
-            cons_op = op.ofm.consumer_list[0]
-            move_splitsliceread_to_consumer(op, cons_op)
+            # SplitSliceRead can be performed by tensor consumer(s)
+            for cons_op in list(op.ofm.consumer_list):
+                move_splitsliceread_to_consumer(op, cons_op)
         else:
             avgpool_op = create_avgpool_nop(op.name + "_avgpool")
             avgpool_op.add_input_tensor(op.ifm)
@@ -801,8 +784,9 @@ def convert_nop_split_to_identity(op, arch, nng):
 
 
 def rewrite_fully_connected_input(op: Operation, arch, nng):
-
-    if op.type == Op.FullyConnected:
+    # If the operation already have a read shape do not modify
+    # the ifm shape, since that will already be correct
+    if op.type == Op.FullyConnected and not op.read_shapes[0]:
         new_shape = op.ifm.get_shape_as_2d(op.weights.shape[-2])
         assert new_shape is not None, "Tensor can not be reshaped to 2D"
         op.ifm_shapes[0] = new_shape
@@ -1080,6 +1064,13 @@ def fixup_relus_with_differing_ifm_ofm_scaling(op, arch, nng):
     return op
 
 
+def convert_lstm(op, arch, nng):
+    if op.type == Op.UnidirectionalSequenceLstm:
+        lstm = Lstm(op)
+        op = lstm.get_graph()
+    return op
+
+
 def convert_softmax(op, arch, nng):
     if op.type == Op.Softmax and op.run_on_npu:
         softmax = SoftMax(op)
@@ -2144,6 +2135,7 @@ def tflite_optimise_graph(nng, arch, force_symmetric_int_weights):
         convert_mean_to_depthwise_conv_or_avgpool,
         convert_depthwise_to_conv,
         convert_conv_to_fc,
+        convert_lstm,
         convert_softmax,
         convert_prelu,
         convert_mul_max_to_abs_or_lrelu,
author	Fredrik Svedberg <fredrik.svedberg@arm.com>	2023-04-11 22:35:04 +0200
committer	Fredrik Svedberg <fredrik.svedberg@arm.com>	2023-04-17 14:16:44 +0200
commit	0ac0804e76e098695ee2b8a9e24e2f0a1efc324f (patch)
tree	9ccb766221987a415244079ed6c596a47d693b20 /ethosu/vela/tflite_graph_optimiser.py
parent	c1ad80b3a581dd39b39a112d6c2026f6560207a4 (diff)
download	ethos-u-vela-0ac0804e76e098695ee2b8a9e24e2f0a1efc324f.tar.gz