aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Hall <tim.hall@arm.com>2021-06-17 17:03:49 +0100
committerTim Hall <tim.hall@arm.com>2021-06-17 17:03:49 +0100
commit3016157e5099a50075d1a8b54d1b2cac2ee3899e (patch)
treee51fc057e07362720d6082bbff7ff20957b49bd7
parent789e6f3acd1a377dfba80aa18d513579fd33fc93 (diff)
downloadethos-u-vela-3016157e5099a50075d1a8b54d1b2cac2ee3899e.tar.gz
Block config optimisation for 256/512 configurations
- 256 and 512 configuration variants execute 1D convolutions in an optimised manner compared to their 2x2 microblock dimensions. This commit takes this into account to improve Conv1D throughput on these configurations. Signed-off-by: Tim Hall <tim.hall@arm.com> Change-Id: I6ecdf6e4a219e356327b22f8393f50ee8817af23
-rw-r--r--ethosu/vela/architecture_allocator.py16
-rw-r--r--ethosu/vela/register_command_stream_generator.py1
-rw-r--r--ethosu/vela/test/test_architecture_allocator.py1
3 files changed, 17 insertions, 1 deletions
diff --git a/ethosu/vela/architecture_allocator.py b/ethosu/vela/architecture_allocator.py
index e43b841d..86410cfd 100644
--- a/ethosu/vela/architecture_allocator.py
+++ b/ethosu/vela/architecture_allocator.py
@@ -196,6 +196,15 @@ def _get_ifm_blocksize(
return Shape4D(1, height, width, ofm_block.depth)
+def fit_block_for_ofm(arch: ArchitectureFeatures, ofm_shape: Shape4D, kernel: Kernel, block: Shape4D):
+ # 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes) This is a specific
+ # interpretation of a more general constraint that can't be applied because the
+ # find_block_config function must return block configs that can be applied to any OFM shape.
+ if (ofm_shape.height == 1) and (kernel.height == 1) and (arch.ofm_ublock.height == 2):
+ return Shape4D(1, min(block.height, ofm_shape.height), block.width, block.depth)
+ return block
+
+
def find_block_config(
arch: ArchitectureFeatures,
npu_op_type: NpuBlockType,
@@ -274,6 +283,7 @@ def find_block_config(
ifm_block = ifm_block.with_depth(ifm_blockdepth)
# Test if the IFM/OFM blocks fit into SHRAM
+ ofm_block = fit_block_for_ofm(arch, ofm_shape, kernel, ofm_block)
layout = _try_block_config(
arch.shram, ew_usage, ofm_block, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks
)
@@ -304,7 +314,7 @@ def find_block_config(
config.layout = layout
config.bank_size = arch.shram_bank_size
config.ifm_block = ifm_block
- config.ofm_block = ofm_block
+ config.ofm_block = Shape4D(1, height, width, depth)
else:
wont_fit[(width, height)] = True
@@ -322,6 +332,7 @@ def try_block_config(
block_config: Block,
arch: ArchitectureFeatures,
npu_op_type: NpuBlockType,
+ ofm_shape: Block,
ifm_shape: Block,
ifm2_shape: Optional[Block],
uses_scalar: bool,
@@ -374,6 +385,9 @@ def try_block_config(
if not is_equal_depth_op:
ifm_block = ifm_block.with_depth(ifm_blockdepth)
+ # 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes)
+ block_config = fit_block_for_ofm(arch, ofm_shape, kernel, block_config)
+
layout = _try_block_config(
arch.shram, ew_usage, block_config, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks
)
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 20431273..b4a633e9 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -583,6 +583,7 @@ def get_arch_block_config(
block_config,
arch,
block_type,
+ npu_op.ofm.shape,
ifm_shape,
ifm2_shape,
uses_scalar,
diff --git a/ethosu/vela/test/test_architecture_allocator.py b/ethosu/vela/test/test_architecture_allocator.py
index 94768fc1..a199e9d8 100644
--- a/ethosu/vela/test/test_architecture_allocator.py
+++ b/ethosu/vela/test/test_architecture_allocator.py
@@ -106,6 +106,7 @@ def test_allocate(test_data):
Block.from_shape(config.ofm_block.as_list()),
arch,
block_type,
+ ofm_shape,
ifm_shape,
ifm2_shape,
is_partkernel=config.is_partkernel,