aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ethosu/vela/architecture_allocator.py16
-rw-r--r--ethosu/vela/register_command_stream_generator.py1
-rw-r--r--ethosu/vela/test/test_architecture_allocator.py1
3 files changed, 17 insertions, 1 deletions
diff --git a/ethosu/vela/architecture_allocator.py b/ethosu/vela/architecture_allocator.py
index e43b841d..86410cfd 100644
--- a/ethosu/vela/architecture_allocator.py
+++ b/ethosu/vela/architecture_allocator.py
@@ -196,6 +196,15 @@ def _get_ifm_blocksize(
return Shape4D(1, height, width, ofm_block.depth)
+def fit_block_for_ofm(arch: ArchitectureFeatures, ofm_shape: Shape4D, kernel: Kernel, block: Shape4D):
+ # 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes) This is a specific
+ # interpretation of a more general constraint that can't be applied because the
+ # find_block_config function must return block configs that can be applied to any OFM shape.
+ if (ofm_shape.height == 1) and (kernel.height == 1) and (arch.ofm_ublock.height == 2):
+ return Shape4D(1, min(block.height, ofm_shape.height), block.width, block.depth)
+ return block
+
+
def find_block_config(
arch: ArchitectureFeatures,
npu_op_type: NpuBlockType,
@@ -274,6 +283,7 @@ def find_block_config(
ifm_block = ifm_block.with_depth(ifm_blockdepth)
# Test if the IFM/OFM blocks fit into SHRAM
+ ofm_block = fit_block_for_ofm(arch, ofm_shape, kernel, ofm_block)
layout = _try_block_config(
arch.shram, ew_usage, ofm_block, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks
)
@@ -304,7 +314,7 @@ def find_block_config(
config.layout = layout
config.bank_size = arch.shram_bank_size
config.ifm_block = ifm_block
- config.ofm_block = ofm_block
+ config.ofm_block = Shape4D(1, height, width, depth)
else:
wont_fit[(width, height)] = True
@@ -322,6 +332,7 @@ def try_block_config(
block_config: Block,
arch: ArchitectureFeatures,
npu_op_type: NpuBlockType,
+ ofm_shape: Block,
ifm_shape: Block,
ifm2_shape: Optional[Block],
uses_scalar: bool,
@@ -374,6 +385,9 @@ def try_block_config(
if not is_equal_depth_op:
ifm_block = ifm_block.with_depth(ifm_blockdepth)
+ # 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes)
+ block_config = fit_block_for_ofm(arch, ofm_shape, kernel, block_config)
+
layout = _try_block_config(
arch.shram, ew_usage, block_config, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks
)
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 20431273..b4a633e9 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -583,6 +583,7 @@ def get_arch_block_config(
block_config,
arch,
block_type,
+ npu_op.ofm.shape,
ifm_shape,
ifm2_shape,
uses_scalar,
diff --git a/ethosu/vela/test/test_architecture_allocator.py b/ethosu/vela/test/test_architecture_allocator.py
index 94768fc1..a199e9d8 100644
--- a/ethosu/vela/test/test_architecture_allocator.py
+++ b/ethosu/vela/test/test_architecture_allocator.py
@@ -106,6 +106,7 @@ def test_allocate(test_data):
Block.from_shape(config.ofm_block.as_list()),
arch,
block_type,
+ ofm_shape,
ifm_shape,
ifm2_shape,
is_partkernel=config.is_partkernel,