aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael McGeagh <michael.mcgeagh@arm.com>2020-09-08 11:09:48 +0100
committerMichael McGeagh <michael.mcgeagh@arm.com>2020-09-11 13:29:06 +0100
commit8dbf8cfefa1feea6598f5f4864657ba6b6ad60ed (patch)
treee55debe4a80b01a79381a6aca378a9c6a7da5447
parentfa4cb29996ffe1e64e39655c2195af6ff02e887a (diff)
downloadethos-u-vela-8dbf8cfefa1feea6598f5f4864657ba6b6ad60ed.tar.gz
MLBEDSW-2745 Support relus with differing scales
In the event we have a relu op with different input and output scales, we need to fuse it with a nop avgpool. Also refactor the existing avgpool nop code to a common function. Signed-off-by: Michael McGeagh <michael.mcgeagh@arm.com> Change-Id: Iedf4513e7595ee4ee1777ba0b1eb38a8df8aed5e
-rw-r--r--ethosu/vela/graph_optimiser.py21
-rw-r--r--ethosu/vela/operation.py15
-rw-r--r--ethosu/vela/pass_packing.py19
3 files changed, 40 insertions, 15 deletions
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 48684058..1a6aaf10 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -28,6 +28,7 @@ from .data_type import DataType
from .errors import UnsupportedFeatureError
from .ethos_u55_regs.ethos_u55_regs import resampling_mode
from .numeric_util import full_shape
+from .operation import create_avgpool_nop
from .operation import NpuBlockType
from .operation import Operation
from .softmax import SoftMax
@@ -563,6 +564,25 @@ def convert_conv_to_fc(op, arch):
return op
+def fixup_relus_with_differing_ifm_ofm_scaling(op, arch):
+ if op.run_on_npu and op.type in relu_ops:
+ ifm = op.inputs[0]
+ ofm = op.outputs[0]
+ # Relu with differing IFM and OFM scaling cannot be fused with another primary op
+ # and requires its own to be inserted
+ if not ifm.is_scaling_equal(ofm):
+ # Override this op with its own primary op (avgpool)
+ relu_fused_op = create_avgpool_nop(op.name + "_avgpool")
+ # And fuse the original activation function to it
+ relu_fused_op.attrs["fused_activation_function"] = op.type
+ # Tidy up and assign the ifm and ofm to the new op
+ ifm.consumer_list.remove(op)
+ relu_fused_op.add_input_tensor(ifm)
+ relu_fused_op.set_output_tensor(ofm)
+ op = relu_fused_op
+ return op
+
+
# Reorder activation op if it's after the memory only operations
def fixup_act_reorder(op, arch):
if op.type in activation_ops:
@@ -929,6 +949,7 @@ def optimise_graph_a(nng, arch, verbose_graph=False):
fixup_fully_connected_input,
fixup_pack_input,
fixup_conv2d_backprop,
+ fixup_relus_with_differing_ifm_ofm_scaling,
fixup_act_reorder,
mark_npu_block_type,
fixup_elementwise_with_scalars,
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index e7fd97c4..6bc5a32d 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -28,6 +28,21 @@ class NpuBlockType(enum.Enum):
ReduceSum = 6
+def create_avgpool_nop(name):
+ op = Operation("AvgPool", name)
+ op.attrs["padding"] = b"VALID"
+ op.attrs["npu_block_type"] = NpuBlockType.Pooling
+ op.attrs["stride_w"] = 1
+ op.attrs["stride_h"] = 1
+ op.attrs["filter_width"] = 1
+ op.attrs["filter_height"] = 1
+ op.attrs["strides"] = [1, 1, 1, 1]
+ op.attrs["ksize"] = [1, 1, 1, 1]
+ op.attrs["skirt"] = [0, 0, 0, 0]
+ op.attrs["explicit_padding"] = [0, 0, 0, 0]
+ return op
+
+
class Operation:
"""Class representing a Neural Network operation. Has a name, a type,
input and output tensors, as well as an attribute dictionary."""
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index 9e36cd62..a1b03fe2 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -20,8 +20,8 @@ import enum
from .nn_graph import Pass
from .nn_graph import PassPlacement
+from .operation import create_avgpool_nop
from .operation import NpuBlockType
-from .operation import Operation
from .tensor import TensorPurpose
@@ -455,20 +455,9 @@ def pack_into_passes(nng, arch, verbose_packing=False):
# Configure a 1x1 AvgPool and attach the op onto it
op = op_list[0]
inp = op.inputs[0]
- avgpool_name = op.name + "_avgpool"
- avgpool_op = Operation("AvgPool", avgpool_name)
- avgpool_op.inputs = [inp]
- avgpool_op.inputs[0].consumer_list.append(avgpool_op)
- avgpool_op.attrs["padding"] = b"VALID"
- avgpool_op.attrs["npu_block_type"] = NpuBlockType.Pooling
- avgpool_op.attrs["stride_w"] = 1
- avgpool_op.attrs["stride_h"] = 1
- avgpool_op.attrs["filter_width"] = 1
- avgpool_op.attrs["filter_height"] = 1
- avgpool_op.attrs["strides"] = [1, 1, 1, 1]
- avgpool_op.attrs["ksize"] = [1, 1, 1, 1]
- avgpool_op.attrs["skirt"] = [0, 0, 0, 0]
- avgpool_op.attrs["explicit_padding"] = [0, 0, 0, 0]
+
+ avgpool_op = create_avgpool_nop(op.name + "_avgpool")
+ avgpool_op.add_input_tensor(inp)
avgpool_out = inp.clone("_avgpooled")
avgpool_out.consumer_list.append(op)
avgpool_op.set_output_tensor(avgpool_out)