From 399c4a2d77df791e5d988c51d7fb1824ac4f266f Mon Sep 17 00:00:00 2001 From: James Ward Date: Wed, 20 Oct 2021 11:04:46 +0100 Subject: MLBEDSW-5383 npu_find_block_configs() differs between v2.1.1 and v3.1.0 * 1D optimised block_config was incorrectly beign set to the ArchitectureBlockConfig in try_block_config() * Write external API test for the reduced block height case (on H256) Signed-off-by: James Ward Change-Id: I9ced7eb31b23730e4423aabbaf769bc72fac8fc9 --- ethosu/vela/architecture_allocator.py | 6 ++-- .../test/extapi/test_extapi_find_block_configs.py | 36 ++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/ethosu/vela/architecture_allocator.py b/ethosu/vela/architecture_allocator.py index 30e1c872..65a684c5 100644 --- a/ethosu/vela/architecture_allocator.py +++ b/ethosu/vela/architecture_allocator.py @@ -47,7 +47,7 @@ class ArchitectureBlockConfig: def __init__(self): self.layout = SHRAMLayout() self.ifm_block = Shape4D() - self.ofm_block = Shape4D() + self.ofm_block = Shape4D() # non-1D-optimised block self.acc_type = SHRAMElements.Acc32 self.is_partkernel = False self.bank_size = 0 @@ -414,10 +414,10 @@ def try_block_config( ifm_block = ifm_block.with_depth(ifm_blockdepth) # 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes) - block_config = fit_block_for_ofm(arch, ofm_shape, kernel, block_config) + block_config_opt = fit_block_for_ofm(arch, ofm_shape, kernel, block_config) layout = _try_block_config( - arch.shram, ew_usage, block_config, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks + arch.shram, ew_usage, block_config_opt, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks ) if layout is None: return None diff --git a/ethosu/vela/test/extapi/test_extapi_find_block_configs.py b/ethosu/vela/test/extapi/test_extapi_find_block_configs.py index 07cb9cb4..a768f18d 100644 --- a/ethosu/vela/test/extapi/test_extapi_find_block_configs.py +++ b/ethosu/vela/test/extapi/test_extapi_find_block_configs.py @@ -61,3 +61,39 @@ def test_find_block_configs(): check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_HEIGHT_M1, op.block_config.height - 1) check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_WIDTH_M1, op.block_config.width - 1) check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_DEPTH_M1, op.block_config.depth - 1) + + +def test_conv2d_block_height_1(): + """Test npu_find_block_configs returns valid config in the special case of reduced ublock height (H256).""" + # Create a Conv2D operation + op = NpuConv2DOperation() + op.ifm = create_feature_map( + NpuShape3D(height=1, width=1, depth=1024), + 1, + 512, + quant=NpuQuantization(scale_f32=0.023528477177023888, zero_point=0), + ) + op.ofm = create_feature_map( + NpuShape3D(height=1, width=1, depth=1001), + 1, + 0x14E40, + quant=NpuQuantization(scale_f32=0.16609922051429749, zero_point=66), + ) + op.kernel = NpuKernel(1, 1, 1, 1, 1, 1) + op.padding = NpuPadding(top=0, left=0, right=0, bottom=0) + op.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST + + # Find valid block configs + accelerator = NpuAccelerator.Ethos_U55_256 + block_configs = npu_find_block_configs(op, accelerator) + # Select the last one + op.block_config = block_configs[-1] + # Note: the weights should be encoded with op.block_config.depth (not shown here) + op.weights = [NpuAddressRange(region=0, address=0, length=7696)] + + # Check that generating register commands succeeds + cmds = npu_generate_register_command_stream([op], accelerator) + # Check that the selected block config was used + check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_HEIGHT_M1, op.block_config.height - 1) + check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_WIDTH_M1, op.block_config.width - 1) + check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_DEPTH_M1, op.block_config.depth - 1) -- cgit v1.2.1