aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Ward <james.ward@arm.com>2021-10-20 11:04:46 +0100
committerpatrik.gustavsson <patrik.gustavsson@arm.com>2021-11-12 13:34:40 +0000
commit399c4a2d77df791e5d988c51d7fb1824ac4f266f (patch)
tree89a935e167cc088655c7cc6729dc027e5dd56ae6
parente6607c563f3ea480fdc25770fdd7434ec7ab776b (diff)
downloadethos-u-vela-399c4a2d77df791e5d988c51d7fb1824ac4f266f.tar.gz
MLBEDSW-5383 npu_find_block_configs() differs between v2.1.1 and v3.1.03.2.0.rc1
* 1D optimised block_config was incorrectly beign set to the ArchitectureBlockConfig in try_block_config() * Write external API test for the reduced block height case (on H256) Signed-off-by: James Ward <james.ward@arm.com> Change-Id: I9ced7eb31b23730e4423aabbaf769bc72fac8fc9
-rw-r--r--ethosu/vela/architecture_allocator.py6
-rw-r--r--ethosu/vela/test/extapi/test_extapi_find_block_configs.py36
2 files changed, 39 insertions, 3 deletions
diff --git a/ethosu/vela/architecture_allocator.py b/ethosu/vela/architecture_allocator.py
index 30e1c87..65a684c 100644
--- a/ethosu/vela/architecture_allocator.py
+++ b/ethosu/vela/architecture_allocator.py
@@ -47,7 +47,7 @@ class ArchitectureBlockConfig:
def __init__(self):
self.layout = SHRAMLayout()
self.ifm_block = Shape4D()
- self.ofm_block = Shape4D()
+ self.ofm_block = Shape4D() # non-1D-optimised block
self.acc_type = SHRAMElements.Acc32
self.is_partkernel = False
self.bank_size = 0
@@ -414,10 +414,10 @@ def try_block_config(
ifm_block = ifm_block.with_depth(ifm_blockdepth)
# 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes)
- block_config = fit_block_for_ofm(arch, ofm_shape, kernel, block_config)
+ block_config_opt = fit_block_for_ofm(arch, ofm_shape, kernel, block_config)
layout = _try_block_config(
- arch.shram, ew_usage, block_config, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks
+ arch.shram, ew_usage, block_config_opt, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks
)
if layout is None:
return None
diff --git a/ethosu/vela/test/extapi/test_extapi_find_block_configs.py b/ethosu/vela/test/extapi/test_extapi_find_block_configs.py
index 07cb9cb..a768f18 100644
--- a/ethosu/vela/test/extapi/test_extapi_find_block_configs.py
+++ b/ethosu/vela/test/extapi/test_extapi_find_block_configs.py
@@ -61,3 +61,39 @@ def test_find_block_configs():
check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_HEIGHT_M1, op.block_config.height - 1)
check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_WIDTH_M1, op.block_config.width - 1)
check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_DEPTH_M1, op.block_config.depth - 1)
+
+
+def test_conv2d_block_height_1():
+ """Test npu_find_block_configs returns valid config in the special case of reduced ublock height (H256)."""
+ # Create a Conv2D operation
+ op = NpuConv2DOperation()
+ op.ifm = create_feature_map(
+ NpuShape3D(height=1, width=1, depth=1024),
+ 1,
+ 512,
+ quant=NpuQuantization(scale_f32=0.023528477177023888, zero_point=0),
+ )
+ op.ofm = create_feature_map(
+ NpuShape3D(height=1, width=1, depth=1001),
+ 1,
+ 0x14E40,
+ quant=NpuQuantization(scale_f32=0.16609922051429749, zero_point=66),
+ )
+ op.kernel = NpuKernel(1, 1, 1, 1, 1, 1)
+ op.padding = NpuPadding(top=0, left=0, right=0, bottom=0)
+ op.block_traversal = NpuBlockTraversal.PART_KERNEL_FIRST
+
+ # Find valid block configs
+ accelerator = NpuAccelerator.Ethos_U55_256
+ block_configs = npu_find_block_configs(op, accelerator)
+ # Select the last one
+ op.block_config = block_configs[-1]
+ # Note: the weights should be encoded with op.block_config.depth (not shown here)
+ op.weights = [NpuAddressRange(region=0, address=0, length=7696)]
+
+ # Check that generating register commands succeeds
+ cmds = npu_generate_register_command_stream([op], accelerator)
+ # Check that the selected block config was used
+ check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_HEIGHT_M1, op.block_config.height - 1)
+ check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_WIDTH_M1, op.block_config.width - 1)
+ check_cmd0(cmds, cmd0.NPU_SET_OFM_BLK_DEPTH_M1, op.block_config.depth - 1)