aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/tflite_graph_optimiser.py
diff options
context:
space:
mode:
authorFredrik Svedberg <fredrik.svedberg@arm.com>2023-04-11 22:35:04 +0200
committerFredrik Svedberg <fredrik.svedberg@arm.com>2023-04-17 14:16:44 +0200
commit0ac0804e76e098695ee2b8a9e24e2f0a1efc324f (patch)
tree9ccb766221987a415244079ed6c596a47d693b20 /ethosu/vela/tflite_graph_optimiser.py
parentc1ad80b3a581dd39b39a112d6c2026f6560207a4 (diff)
downloadethos-u-vela-0ac0804e76e098695ee2b8a9e24e2f0a1efc324f.tar.gz
MLBEDSW-7196 Add LSTM support
Added int8 and int16 UNIDIRECTIONAL_SEQUENCE_LSTM support. The implementation does not include support for: * CIFG * Peephole * Projection * Normalisation This change also: * Removed unused Op.BlockLSTM operation type. * Removed the only one consumer limitation on putting the SplitSliceRead on the tensor consumer(s), if all consumers fullfills the requirements * Added Op.VariableTensorWrite as a Operation.memory_function to make sure writes to variable tensors: * Always use linear mode * Are not moved to fast scratch * Are not fused with other elementwise operation tensor ranges Change-Id: Ief831738924ac3d1f2ba6d41f10bd6dc969911f3 Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
Diffstat (limited to 'ethosu/vela/tflite_graph_optimiser.py')
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py50
1 files changed, 21 insertions, 29 deletions
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 077f4afa..478d0189 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -35,11 +35,13 @@ from .graph_optimiser_util import bypass_memory_only_ops
from .graph_optimiser_util import calc_explicit_padding
from .graph_optimiser_util import convert_depthwise_to_conv
from .graph_optimiser_util import convert_to_lut
+from .graph_optimiser_util import create_avg_pool_for_concat
from .graph_optimiser_util import memory_only_ops
from .graph_optimiser_util import move_splitsliceread_to_consumer
from .graph_optimiser_util import needed_total_padding
from .graph_optimiser_util import set_ifm_ofm_op_shapes
from .graph_optimiser_util import set_tensor_equivalence
+from .lstm import Lstm
from .numeric_util import clamp_sigmoid
from .numeric_util import full_shape
from .numeric_util import round_away_zero
@@ -69,23 +71,6 @@ from .tflite_mapping import optype_to_builtintype
passthrough_nodes = (Op.Identity,)
-def create_avg_pool_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D):
- """Creates an average pool for the given concat op/input feature map"""
- ofm = concat_op.ofm
- avgpool_op = create_avgpool_nop(name)
- avgpool_op.inputs = [ifm]
- avgpool_op.outputs = [ofm]
-
- avgpool_op.write_offset = write_offset
- avgpool_op.write_shape = ifm_shape
- ofm.ops.append(avgpool_op)
- avgpool_op.ifm_shapes.append(ifm_shape)
- avgpool_op.ofm_shapes.append(concat_op.ofm_shapes[0])
- avgpool_op.memory_function = Op.ConcatSliceWrite
- DebugDatabase.add_optimised(concat_op, avgpool_op)
- return avgpool_op
-
-
def remove_passthrough_tensor(tens, arch, nng):
if len(tens.ops) == 1 and tens.ops[0].type in passthrough_nodes:
assert len(tens.ops[0].inputs) == 1
@@ -196,17 +181,15 @@ def rewrite_split_ops(tens, arch, nng):
def remove_SplitSliceRead(op, arch):
if op.type == Op.SplitSliceRead:
- # Check if it is possible to put the SplitSliceRead on the tensor consumer, or if an avgpool need to be inserted
- if (
- len(op.ofm.consumer_list) == 1
- and op.ofm.consumer_list[0] is not None
- and op.ofm.consumer_list[0].run_on_npu
- and op.ofm.consumer_list[0].type not in memory_only_ops
- and op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape)
+ # Check if it is possible to put the SplitSliceRead on the tensor consumer(s),
+ # or if an avgpool need to be inserted
+ if op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape) and all(
+ consumer is not None and consumer.run_on_npu and consumer.type not in memory_only_ops
+ for consumer in op.ofm.consumer_list
):
- # SplitSliceRead can be performed by tensor consumer
- cons_op = op.ofm.consumer_list[0]
- move_splitsliceread_to_consumer(op, cons_op)
+ # SplitSliceRead can be performed by tensor consumer(s)
+ for cons_op in list(op.ofm.consumer_list):
+ move_splitsliceread_to_consumer(op, cons_op)
else:
avgpool_op = create_avgpool_nop(op.name + "_avgpool")
avgpool_op.add_input_tensor(op.ifm)
@@ -801,8 +784,9 @@ def convert_nop_split_to_identity(op, arch, nng):
def rewrite_fully_connected_input(op: Operation, arch, nng):
-
- if op.type == Op.FullyConnected:
+ # If the operation already have a read shape do not modify
+ # the ifm shape, since that will already be correct
+ if op.type == Op.FullyConnected and not op.read_shapes[0]:
new_shape = op.ifm.get_shape_as_2d(op.weights.shape[-2])
assert new_shape is not None, "Tensor can not be reshaped to 2D"
op.ifm_shapes[0] = new_shape
@@ -1080,6 +1064,13 @@ def fixup_relus_with_differing_ifm_ofm_scaling(op, arch, nng):
return op
+def convert_lstm(op, arch, nng):
+ if op.type == Op.UnidirectionalSequenceLstm:
+ lstm = Lstm(op)
+ op = lstm.get_graph()
+ return op
+
+
def convert_softmax(op, arch, nng):
if op.type == Op.Softmax and op.run_on_npu:
softmax = SoftMax(op)
@@ -2144,6 +2135,7 @@ def tflite_optimise_graph(nng, arch, force_symmetric_int_weights):
convert_mean_to_depthwise_conv_or_avgpool,
convert_depthwise_to_conv,
convert_conv_to_fc,
+ convert_lstm,
convert_softmax,
convert_prelu,
convert_mul_max_to_abs_or_lrelu,