From 8dbf8cfefa1feea6598f5f4864657ba6b6ad60ed Mon Sep 17 00:00:00 2001 From: Michael McGeagh Date: Tue, 8 Sep 2020 11:09:48 +0100 Subject: MLBEDSW-2745 Support relus with differing scales In the event we have a relu op with different input and output scales, we need to fuse it with a nop avgpool. Also refactor the existing avgpool nop code to a common function. Signed-off-by: Michael McGeagh Change-Id: Iedf4513e7595ee4ee1777ba0b1eb38a8df8aed5e --- ethosu/vela/graph_optimiser.py | 21 +++++++++++++++++++++ ethosu/vela/operation.py | 15 +++++++++++++++ ethosu/vela/pass_packing.py | 19 ++++--------------- 3 files changed, 40 insertions(+), 15 deletions(-) (limited to 'ethosu/vela') diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py index 48684058..1a6aaf10 100644 --- a/ethosu/vela/graph_optimiser.py +++ b/ethosu/vela/graph_optimiser.py @@ -28,6 +28,7 @@ from .data_type import DataType from .errors import UnsupportedFeatureError from .ethos_u55_regs.ethos_u55_regs import resampling_mode from .numeric_util import full_shape +from .operation import create_avgpool_nop from .operation import NpuBlockType from .operation import Operation from .softmax import SoftMax @@ -563,6 +564,25 @@ def convert_conv_to_fc(op, arch): return op +def fixup_relus_with_differing_ifm_ofm_scaling(op, arch): + if op.run_on_npu and op.type in relu_ops: + ifm = op.inputs[0] + ofm = op.outputs[0] + # Relu with differing IFM and OFM scaling cannot be fused with another primary op + # and requires its own to be inserted + if not ifm.is_scaling_equal(ofm): + # Override this op with its own primary op (avgpool) + relu_fused_op = create_avgpool_nop(op.name + "_avgpool") + # And fuse the original activation function to it + relu_fused_op.attrs["fused_activation_function"] = op.type + # Tidy up and assign the ifm and ofm to the new op + ifm.consumer_list.remove(op) + relu_fused_op.add_input_tensor(ifm) + relu_fused_op.set_output_tensor(ofm) + op = relu_fused_op + return op + + # Reorder activation op if it's after the memory only operations def fixup_act_reorder(op, arch): if op.type in activation_ops: @@ -929,6 +949,7 @@ def optimise_graph_a(nng, arch, verbose_graph=False): fixup_fully_connected_input, fixup_pack_input, fixup_conv2d_backprop, + fixup_relus_with_differing_ifm_ofm_scaling, fixup_act_reorder, mark_npu_block_type, fixup_elementwise_with_scalars, diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py index e7fd97c4..6bc5a32d 100644 --- a/ethosu/vela/operation.py +++ b/ethosu/vela/operation.py @@ -28,6 +28,21 @@ class NpuBlockType(enum.Enum): ReduceSum = 6 +def create_avgpool_nop(name): + op = Operation("AvgPool", name) + op.attrs["padding"] = b"VALID" + op.attrs["npu_block_type"] = NpuBlockType.Pooling + op.attrs["stride_w"] = 1 + op.attrs["stride_h"] = 1 + op.attrs["filter_width"] = 1 + op.attrs["filter_height"] = 1 + op.attrs["strides"] = [1, 1, 1, 1] + op.attrs["ksize"] = [1, 1, 1, 1] + op.attrs["skirt"] = [0, 0, 0, 0] + op.attrs["explicit_padding"] = [0, 0, 0, 0] + return op + + class Operation: """Class representing a Neural Network operation. Has a name, a type, input and output tensors, as well as an attribute dictionary.""" diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py index 9e36cd62..a1b03fe2 100644 --- a/ethosu/vela/pass_packing.py +++ b/ethosu/vela/pass_packing.py @@ -20,8 +20,8 @@ import enum from .nn_graph import Pass from .nn_graph import PassPlacement +from .operation import create_avgpool_nop from .operation import NpuBlockType -from .operation import Operation from .tensor import TensorPurpose @@ -455,20 +455,9 @@ def pack_into_passes(nng, arch, verbose_packing=False): # Configure a 1x1 AvgPool and attach the op onto it op = op_list[0] inp = op.inputs[0] - avgpool_name = op.name + "_avgpool" - avgpool_op = Operation("AvgPool", avgpool_name) - avgpool_op.inputs = [inp] - avgpool_op.inputs[0].consumer_list.append(avgpool_op) - avgpool_op.attrs["padding"] = b"VALID" - avgpool_op.attrs["npu_block_type"] = NpuBlockType.Pooling - avgpool_op.attrs["stride_w"] = 1 - avgpool_op.attrs["stride_h"] = 1 - avgpool_op.attrs["filter_width"] = 1 - avgpool_op.attrs["filter_height"] = 1 - avgpool_op.attrs["strides"] = [1, 1, 1, 1] - avgpool_op.attrs["ksize"] = [1, 1, 1, 1] - avgpool_op.attrs["skirt"] = [0, 0, 0, 0] - avgpool_op.attrs["explicit_padding"] = [0, 0, 0, 0] + + avgpool_op = create_avgpool_nop(op.name + "_avgpool") + avgpool_op.add_input_tensor(inp) avgpool_out = inp.clone("_avgpooled") avgpool_out.consumer_list.append(op) avgpool_op.set_output_tensor(avgpool_out) -- cgit v1.2.1