Block config optimisation for 256/512 configurations

- 256 and 512 configuration variants execute 1D convolutions in an optimised manner compared to their 2x2 microblock dimensions. This commit takes this into account to improve Conv1D throughput on these configurations. Signed-off-by: Tim Hall <tim.hall@arm.com> Change-Id: I6ecdf6e4a219e356327b22f8393f50ee8817af23
author: Tim Hall <tim.hall@arm.com> 2021-06-17 17:03:49 +0100
committer: Tim Hall <tim.hall@arm.com> 2021-06-17 17:03:49 +0100
commit: 3016157e5099a50075d1a8b54d1b2cac2ee3899e (patch)
tree: e51fc057e07362720d6082bbff7ff20957b49bd7
parent: 789e6f3acd1a377dfba80aa18d513579fd33fc93 (diff)
download: ethos-u-vela-3016157e5099a50075d1a8b54d1b2cac2ee3899e.tar.gz
3 files changed, 17 insertions, 1 deletions
diff --git a/ethosu/vela/architecture_allocator.py b/ethosu/vela/architecture_allocator.py
index e43b841d..86410cfd 100644
--- a/ethosu/vela/architecture_allocator.py
+++ b/ethosu/vela/architecture_allocator.py
@@ -196,6 +196,15 @@ def _get_ifm_blocksize(
     return Shape4D(1, height, width, ofm_block.depth)
 
 
+def fit_block_for_ofm(arch: ArchitectureFeatures, ofm_shape: Shape4D, kernel: Kernel, block: Shape4D):
+    # 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes) This is a specific
+    # interpretation of a more general constraint that can't be applied because the
+    # find_block_config function must return block configs that can be applied to any OFM shape.
+    if (ofm_shape.height == 1) and (kernel.height == 1) and (arch.ofm_ublock.height == 2):
+        return Shape4D(1, min(block.height, ofm_shape.height), block.width, block.depth)
+    return block
+
+
 def find_block_config(
     arch: ArchitectureFeatures,
     npu_op_type: NpuBlockType,
@@ -274,6 +283,7 @@ def find_block_config(
                     ifm_block = ifm_block.with_depth(ifm_blockdepth)
 
                 # Test if the IFM/OFM blocks fit into SHRAM
+                ofm_block = fit_block_for_ofm(arch, ofm_shape, kernel, ofm_block)
                 layout = _try_block_config(
                     arch.shram, ew_usage, ofm_block, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks
                 )
@@ -304,7 +314,7 @@ def find_block_config(
                         config.layout = layout
                         config.bank_size = arch.shram_bank_size
                         config.ifm_block = ifm_block
-                        config.ofm_block = ofm_block
+                        config.ofm_block = Shape4D(1, height, width, depth)
                 else:
                     wont_fit[(width, height)] = True
 
@@ -322,6 +332,7 @@ def try_block_config(
     block_config: Block,
     arch: ArchitectureFeatures,
     npu_op_type: NpuBlockType,
+    ofm_shape: Block,
     ifm_shape: Block,
     ifm2_shape: Optional[Block],
     uses_scalar: bool,
@@ -374,6 +385,9 @@ def try_block_config(
     if not is_equal_depth_op:
         ifm_block = ifm_block.with_depth(ifm_blockdepth)
 
+    # 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes)
+    block_config = fit_block_for_ofm(arch, ofm_shape, kernel, block_config)
+
     layout = _try_block_config(
         arch.shram, ew_usage, block_config, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks
     )
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 20431273..b4a633e9 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -583,6 +583,7 @@ def get_arch_block_config(
         block_config,
         arch,
         block_type,
+        npu_op.ofm.shape,
         ifm_shape,
         ifm2_shape,
         uses_scalar,
diff --git a/ethosu/vela/test/test_architecture_allocator.py b/ethosu/vela/test/test_architecture_allocator.py
index 94768fc1..a199e9d8 100644
--- a/ethosu/vela/test/test_architecture_allocator.py
+++ b/ethosu/vela/test/test_architecture_allocator.py
@@ -106,6 +106,7 @@ def test_allocate(test_data):
         Block.from_shape(config.ofm_block.as_list()),
         arch,
         block_type,
+        ofm_shape,
         ifm_shape,
         ifm2_shape,
         is_partkernel=config.is_partkernel,
author	Tim Hall <tim.hall@arm.com>	2021-06-17 17:03:49 +0100
committer	Tim Hall <tim.hall@arm.com>	2021-06-17 17:03:49 +0100
commit	3016157e5099a50075d1a8b54d1b2cac2ee3899e (patch)
tree	e51fc057e07362720d6082bbff7ff20957b49bd7
parent	789e6f3acd1a377dfba80aa18d513579fd33fc93 (diff)
download	ethos-u-vela-3016157e5099a50075d1a8b54d1b2cac2ee3899e.tar.gz