From 199e8e66ba3d959fd0f584683e5b1c1fda77ce6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Davidsson?= Date: Tue, 10 Oct 2023 11:22:59 +0200 Subject: MLBEDSW-8117: Incorrect stride check for IFM/IFM2 and OFM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The constraint check for the IFM/IFM2/OFM strides were coded according to an incorrect version of the specification. Changed the check to verify that the strides are a multiple of 16 bytes. Also changed the wording in the exception message to clarify if it is a stride or value violating the constraint. Test case had two stride settings violating the constraint, after this change one of them still fails the check, so no change to tests, except in comments clarifying what is being tested. Change-Id: I93815d8bb08303b5f747c947c0bbd461b12895e3 Signed-off-by: Björn Davidsson --- ethosu/vela/register_command_stream_generator.py | 6 +++--- ethosu/vela/register_command_stream_util.py | 20 ++++++++++++++------ .../test/extapi/test_extapi_generate_commands.py | 4 ++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index 56aae73..42ae99d 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -81,7 +81,7 @@ from .register_command_stream_util import calc_blockdep from .register_command_stream_util import check_addresses from .register_command_stream_util import check_alignment from .register_command_stream_util import check_dma_op -from .register_command_stream_util import check_size +from .register_command_stream_util import check_length from .register_command_stream_util import check_strides from .register_command_stream_util import get_dma_memory_accesses from .register_command_stream_util import get_op_memory_accesses @@ -526,7 +526,7 @@ def generate_weights(emit: CommandStreamEmitter, weights: List[NpuAddressRange], ): if core < len(weights): check_alignment(weights[core].address, 16) - check_size(weights[core].length, 16) + check_length(weights[core].length, 16) emit.cmd1_with_address(addr, weights[core].address) emit.cmd1_with_offset(length, weights[core].length) elif core < arch.ncores: @@ -546,7 +546,7 @@ def generate_biases(emit: CommandStreamEmitter, biases: List[NpuAddressRange], a ): if core < len(biases): emit.cmd1_with_address(addr, biases[core].address) - check_size(biases[core].length, 16) + check_length(biases[core].length, 16) emit.cmd1_with_offset(length, biases[core].length) elif core < arch.ncores: emit.cmd1_with_address(addr, biases[0].address) diff --git a/ethosu/vela/register_command_stream_util.py b/ethosu/vela/register_command_stream_util.py index c7050a3..74c4f90 100644 --- a/ethosu/vela/register_command_stream_util.py +++ b/ethosu/vela/register_command_stream_util.py @@ -60,10 +60,18 @@ def check_alignment(payload, required_alignment): raise ByteAlignmentError(f"Cmd1 payload of size: {payload} Bytes is not {required_alignment}-byte aligned") -def check_size(payload, required_multiple): +def check_size(payload, required_multiple, value_type): # assuming payload is defined in bytes if payload % required_multiple != 0: - raise ByteSizeError(f"Cmd1 payload of size: {payload} Bytes is not a multiple of {required_multiple}") + raise ByteSizeError(f"Cmd1 {value_type} of size: {payload} Bytes is not a multiple of {required_multiple}") + + +def check_stride(stride, required_multiple): + check_size(stride, required_multiple, "stride") + + +def check_length(length, required_multiple): + check_size(length, required_multiple, "length") def to_npu_kernel(kernel: Kernel) -> NpuKernel: @@ -263,12 +271,12 @@ def check_strides(fm: NpuFeatureMap, strides: NpuShape3D): if fm.layout == NpuLayout.NHCWB16: strides_to_check = [strides.depth, strides.height] - required_multiple = 16 * element_size_in_bytes + required_multiple = 16 else: strides_to_check = [strides.height, strides.width] required_multiple = element_size_in_bytes for stride in strides_to_check: - check_size(stride, required_multiple) + check_stride(stride, required_multiple) def check_addresses(addresses: List[int], layout: NpuLayout, element_size, arch: ArchitectureFeatures): @@ -384,11 +392,11 @@ def check_dma_op(dma_op: NpuDmaOperation, arch: ArchitectureFeatures): check_alignment(dma_op.src.address, 16) if dma_op.dest.region == BASE_PTR_INDEX_MEM2MEM: check_alignment(dma_op.dest.address, 16) - check_size(dma_op.src.length, 16) + check_length(dma_op.src.length, 16) else: check_alignment(dma_op.src.address, 16) check_alignment(dma_op.dest.address, 16) - check_size(dma_op.src.length, 16) + check_length(dma_op.src.length, 16) # ------------------------------------------------------------------- diff --git a/ethosu/vela/test/extapi/test_extapi_generate_commands.py b/ethosu/vela/test/extapi/test_extapi_generate_commands.py index b21aae3..92f6c79 100644 --- a/ethosu/vela/test/extapi/test_extapi_generate_commands.py +++ b/ethosu/vela/test/extapi/test_extapi_generate_commands.py @@ -861,11 +861,11 @@ def test_cmd1_payload_legality(): op.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST op.block_config = NpuShape3D(height=16, width=4, depth=16) - # NHWC depth stride not a multiple of 32 passes + # NHWC height stride not a multiple of 16 passes op.ifm.strides = NpuShape3D(depth=16, height=2, width=16) npu_generate_register_command_stream([op], NpuAccelerator.Ethos_U65_256) - # Same depth stride fails for NHCWB16 + # Same height stride fails for NHCWB16 op.ifm = create_feature_map( ifm_shape, 1, -- cgit v1.2.1