aboutsummaryrefslogtreecommitdiff
path: root/ethosu
diff options
context:
space:
mode:
authorRickard Bolin <rickard.bolin@arm.com>2024-01-31 12:05:11 +0000
committerRickard Bolin <rickard.bolin@arm.com>2024-05-16 14:08:21 +0000
commitbe78a053a57da7bdae240690c933824c0861f55b (patch)
treee6eabce902b42fcbdc7ef4cf7cfbc8136e11246d /ethosu
parent891468561ecfc61d27adcdc92b41ec216eaa1b08 (diff)
downloadethos-u-vela-main.tar.gz
MLBEDSW-8561: Striding support in H/W for StridedSliceHEAD3.12.0.rc1main
Change-Id: Ie6f39d9c4125f7c16d27621de47cd76143c2e636 Signed-off-by: Rickard Bolin <rickard.bolin@arm.com>
Diffstat (limited to 'ethosu')
-rw-r--r--ethosu/vela/high_level_command_to_npu_op.py17
-rw-r--r--ethosu/vela/operation.py12
-rw-r--r--ethosu/vela/test/test_tflite_supported_operators.py6
-rw-r--r--ethosu/vela/tflite_graph_optimiser.py38
-rw-r--r--ethosu/vela/tflite_supported_operators.py7
5 files changed, 54 insertions, 26 deletions
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 52d07187..71181d05 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -410,16 +410,20 @@ def create_feature_map(
assert strides is not None
+ multiplied_strides = strides.copy()
if stride_multiplier and stride_multiplier != [1, 1, 1]:
assert (
tens.format == TensorFormat.NHWC
), "Only default stride multiplier ([1, 1, 1]) supported for NHCWB16 format"
# Multiply strides for C/H/W (in that order) with corresponding stride factor
for i, stride_factor in enumerate(stride_multiplier, start=1):
- strides[i] *= stride_factor
+ multiplied_strides[i] *= stride_factor
+
+ # Stride multiplier only affects tiles and addresses for OFM
+ _strides = multiplied_strides if is_ofm else strides
height_0, height_1, width_0, addresses = tens.addresses_for_rolling_buffer(
- box.start_coord, box.end_coord, strides, op_shape4D
+ box.start_coord, box.end_coord, _strides, op_shape4D
)
for idx, offset in enumerate(tile_base_offsets):
@@ -427,7 +431,9 @@ def create_feature_map(
fm.tiles = NpuTileBox(
height_0=height_0, height_1=height_1, width_0=width_0, addresses=[int(addr) for addr in addresses]
)
- fm.strides = NpuShape3D(height=int(strides[2]), width=int(strides[3]), depth=int(strides[1]))
+ fm.strides = NpuShape3D(
+ height=int(multiplied_strides[2]), width=int(multiplied_strides[3]), depth=int(multiplied_strides[1])
+ )
fm.name = tens.name
return fm
@@ -518,8 +524,9 @@ def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: Archit
ifm_height = cmd.ifm_box.get_block().height
ifm_width = cmd.ifm_box.get_block().width
ifm_depth = get_ifm_depth(op.type.npu_block_type, cmd.ifm_box, cmd.ofm_box)
-
- npu_op.ifm = create_feature_map(cmd.ifm_tensor, cmd.ifm_box, arch, ps.ifm_shapes[0], op.tile_base_offsets_ifm[0])
+ npu_op.ifm = create_feature_map(
+ cmd.ifm_tensor, cmd.ifm_box, arch, ps.ifm_shapes[0], op.tile_base_offsets_ifm[0], op.ifm_stride_multiplier[0]
+ )
npu_op.ifm.shape = NpuShape3D(height=ifm_height, width=ifm_width, depth=ifm_depth)
npu_op.ifm.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm_tensor)
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index a831537b..9a917f22 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -511,6 +511,7 @@ class Operation:
"tile_base_offsets_ifm",
"tile_base_offsets_ofm",
"ofm_stride_multiplier",
+ "ifm_stride_multiplier",
)
def __init__(self, op_type: Op, name: str):
@@ -554,8 +555,9 @@ class Operation:
self.tile_base_offsets_ifm: List[List[int]] = [[0, 0, 0, 0], [0, 0, 0, 0]]
# ofm (nhwc)
self.tile_base_offsets_ofm: List[int] = [0, 0, 0, 0]
- # For interleaved/sparse outputs - stride is multiplied with the stride factor of the corresponding axis
- # Order is [C, H, W] - default is no multiplication
+ # Stride is multiplied with the ifm/ofm stride factor of the corresponding axis
+ # Order is [C, H, W]
+ self.ifm_stride_multiplier: List[List[int]] = [[1, 1, 1], [1, 1, 1]]
self.ofm_stride_multiplier: List[int] = [1, 1, 1]
def clone(self, suffix="_clone"):
@@ -585,6 +587,7 @@ class Operation:
res.ifm_resampling_mode = self.ifm_resampling_mode
res.tile_base_offsets_ifm = [_ifm.copy() for _ifm in self.tile_base_offsets_ifm]
res.tile_base_offsets_ofm = self.tile_base_offsets_ofm.copy()
+ res.ifm_stride_multiplier = [_ifm.copy() for _ifm in self.ifm_stride_multiplier]
res.ofm_stride_multiplier = self.ofm_stride_multiplier.copy()
return res
@@ -763,6 +766,7 @@ class Operation:
offset_start = None
offset_end = None
axis = None
+ strides_tens = None
if self.type == Op.Split:
num_splits = self.attrs.get("num_splits")
axis_tens = self.inputs[0]
@@ -831,7 +835,7 @@ class Operation:
else:
assert False
- return input_tens, outputs, axis, offset_start, offset_end
+ return input_tens, outputs, axis, offset_start, offset_end, strides_tens
def set_activation_lut(self, lut_tensor):
self.activation = ActivationFunction(Op.LUT)
diff --git a/ethosu/vela/test/test_tflite_supported_operators.py b/ethosu/vela/test/test_tflite_supported_operators.py
index e65717a8..3b15b318 100644
--- a/ethosu/vela/test/test_tflite_supported_operators.py
+++ b/ethosu/vela/test/test_tflite_supported_operators.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -542,7 +542,9 @@ def create_strided_slice():
def test_constraint_stridedslice_stride_values():
# Unsupported strides
op = create_strided_slice()
- op.inputs[3].values = [1, 1, 2, 1]
+ op.inputs[3].values = [1, 2, 2, 1]
+ assert support.is_operator_supported(op)
+ op.inputs[3].values = [2, 1, 1, 1]
assert not support.is_operator_supported(op)
diff --git a/ethosu/vela/tflite_graph_optimiser.py b/ethosu/vela/tflite_graph_optimiser.py
index 3af8588c..ccbb1f28 100644
--- a/ethosu/vela/tflite_graph_optimiser.py
+++ b/ethosu/vela/tflite_graph_optimiser.py
@@ -141,7 +141,7 @@ def rewrite_split_ops(tens, arch, nng):
if not split_op.run_on_npu:
return tens
- inp, outputs, axis, offset_start, offset_end = split_op.get_split_inputs_axis()
+ inp, outputs, axis, offset_start, offset_end, strides_tens = split_op.get_split_inputs_axis()
tens.ops = []
new_op = Operation(Op.SplitSliceRead, split_op.name)
@@ -150,8 +150,10 @@ def rewrite_split_ops(tens, arch, nng):
if None in (offset_end, offset_start):
read_shape = None
else:
- # the read shape is relative to each start offset
- read_shape = Shape4D([oe - os for oe, os in zip(offset_end, offset_start)])
+ # The read shape is relative to each start offset
+ # Limit read shape to the size of the IFM - offset is not necessarily limited
+ ifm_dims = split_op.ifm_shapes[0].as_list()
+ read_shape = Shape4D([min(oe, ifm_dim) - os for oe, os, ifm_dim in zip(offset_end, offset_start, ifm_dims)])
# For Split the offset cannot be extracted from the tensor so it has to
# be calculated from the index of the output tensor
@@ -182,6 +184,9 @@ def rewrite_split_ops(tens, arch, nng):
new_op.set_output_tensor(tens)
new_op.ifm_shapes.append(Shape4D(inp.shape))
new_op.ofm_shapes.append(split_op.ofm_shapes[ofm_shape_idx])
+ # Set stride multiplier in H/W if a stride tensor is provided
+ s_h, s_w = (strides_tens.values[-3], strides_tens.values[-2]) if strides_tens else (1, 1)
+ new_op.ifm_stride_multiplier[0] = [1, s_h, s_w] # C/H/W
DebugDatabase.add_optimised(split_op, new_op)
return tens
@@ -193,18 +198,24 @@ def remove_SplitSliceRead(op, arch):
# Check if it is possible to put the SplitSliceRead on the tensor consumer(s),
# or if an avgpool need to be inserted
# Not possible to move:
+ # - if ifm stride multiplier is larger than one in any dimension
# - if consumer is a Transpose op since ifm shape has been reshaped and can not be changed
# - if consumer is elementwise and ifm needs to be broadcasted
- if op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape) and all(
- consumer is not None
- and consumer.run_on_npu
- and consumer.type not in memory_only_ops
- and consumer.original_type != Op.Transpose
- and check_splitsliceread_to_consumer_shape(op, consumer)
- and not (
- consumer.type.is_binary_elementwise_op() and Shape4D.from_list(consumer.ofm.shape) != op.ofm_shapes[0]
+ if (
+ op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape)
+ and all(s_mul == 1 for s_mul in op.ifm_stride_multiplier[0])
+ and all(
+ consumer is not None
+ and consumer.run_on_npu
+ and consumer.type not in memory_only_ops
+ and consumer.original_type != Op.Transpose
+ and check_splitsliceread_to_consumer_shape(op, consumer)
+ and not (
+ consumer.type.is_binary_elementwise_op()
+ and Shape4D.from_list(consumer.ofm.shape) != op.ofm_shapes[0]
+ )
+ for consumer in op.ofm.consumer_list
)
- for consumer in op.ofm.consumer_list
):
# SplitSliceRead can be performed by tensor consumer(s)
for cons_op in list(op.ofm.consumer_list):
@@ -219,6 +230,9 @@ def remove_SplitSliceRead(op, arch):
avgpool_op.ofm_shapes.append(op.ofm_shapes[0])
avgpool_op.read_offsets[0] = op.read_offsets[0]
avgpool_op.read_shapes[0] = op.read_shapes[0]
+ if any(s_mul != 1 for s_mul in op.ifm_stride_multiplier[0]):
+ avgpool_op.ifm_stride_multiplier[0] = op.ifm_stride_multiplier[0].copy()
+ avgpool_op.ifm.force_linear_format = True
op.ifm.consumer_list.remove(op)
DebugDatabase.add_optimised(op, avgpool_op)
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index 91a3ee83..b293a2ef 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -841,10 +841,11 @@ class TFLiteSupportedOperators:
@staticmethod
def constraint_stridedslice_stride_values(op):
- "All Strides values must be 1"
+ "Batch and channel stride values must be 1"
strides = op.inputs[3]
- valid = all(stride == 1 for stride in strides.values)
- return valid, f"Op has strides values {strides.values}"
+ s_c = strides.values[-1]
+ s_n = strides.values[0] if len(strides.values) > 3 else 1
+ return s_n == s_c == 1, f"Op has strides values {strides.values}"
@staticmethod
def constraint_stridedslice_offset_false(op):