aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/high_level_command_to_npu_op.py
diff options
context:
space:
mode:
authorJohan Alfven <johan.alfven@arm.com>2023-10-28 16:04:46 +0200
committerJohan Alfven <johan.alfven@arm.com>2023-11-09 11:59:58 +0100
commita8fda88bced0d11441467b6798885101d41ac8e9 (patch)
tree807de7fa4eee48720255fbed4a605218f8612f6a /ethosu/vela/high_level_command_to_npu_op.py
parent4bf0cdf58416edc030ae7507ace95224785e4aa8 (diff)
downloadethos-u-vela-a8fda88bced0d11441467b6798885101d41ac8e9.tar.gz
MLBEDSW-8290: MLCE: Add TRANSPOSE support3.10.0.rc1
- Added graph optimiser function to convert TRANSPOSE op into an AvgPool op with swapped stride for height and width - Added TRANSPOSE supported op check - Added unit tests for TRANSPOSE supported op check - Updated SUPPORTED_OPS.md - Fixed problem in pass packing when optimizing the pass list. Old problem, but now seen when moving TRANSPOSE from cpu. Change-Id: I0a0ef420b0fb8241090c2e2434622881105cde15 Signed-off-by: Johan Alfven <johan.alfven@arm.com>
Diffstat (limited to 'ethosu/vela/high_level_command_to_npu_op.py')
-rw-r--r--ethosu/vela/high_level_command_to_npu_op.py20
1 files changed, 18 insertions, 2 deletions
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 5e9dffac..53df0966 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -383,6 +383,7 @@ def create_feature_map(
op_shape4D: Shape4D,
tile_base_offsets: List[int],
stride_multiplier: Optional[List[int]] = None,
+ is_ofm: bool = False,
) -> NpuFeatureMap:
"""Creates feature map with common fields populated"""
fm = NpuFeatureMap()
@@ -395,7 +396,16 @@ def create_feature_map(
else:
assert 0, "Incorrect tensor format"
- strides = tens.get_strides(op_shape4D)
+ if is_ofm and tens.ops[0] is not None and tens.ops[0].original_type == Op.Transpose:
+ # op_shape4D has ifm shape, see fixup_transpose. Stride calculations needs to be
+ # based on the correct ofm shape.
+ op_shape4D_ofm_shape = Shape4D([op_shape4D.batch, op_shape4D.width, op_shape4D.height, op_shape4D.depth])
+ strides = tens.get_strides(op_shape4D_ofm_shape)
+ # Swap h and w strides which will cause the transpose to happen
+ strides[-3], strides[-2] = strides[-2], strides[-3]
+ else:
+ strides = tens.get_strides(op_shape4D)
+
assert strides is not None
if stride_multiplier and stride_multiplier != [1, 1, 1]:
@@ -513,7 +523,13 @@ def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: Archit
out_block = cmd.ofm_box.get_block()
npu_op.ofm = create_feature_map(
- cmd.ofm_tensor, cmd.ofm_box, arch, ps.ofm_shapes[0], op.tile_base_offsets_ofm, op.ofm_stride_multiplier
+ cmd.ofm_tensor,
+ cmd.ofm_box,
+ arch,
+ ps.ofm_shapes[0],
+ op.tile_base_offsets_ofm,
+ op.ofm_stride_multiplier,
+ is_ofm=True,
)
npu_op.ofm.shape = NpuShape3D(height=out_block.height, width=out_block.width, depth=out_block.depth)
npu_op.ofm.quantization = get_ofm_quantization(ps, cmd.ofm_tensor)