diff options
-rw-r--r-- | ethosu/vela/architecture_allocator.py | 16 | ||||
-rw-r--r-- | ethosu/vela/register_command_stream_generator.py | 1 | ||||
-rw-r--r-- | ethosu/vela/test/test_architecture_allocator.py | 1 |
3 files changed, 17 insertions, 1 deletions
diff --git a/ethosu/vela/architecture_allocator.py b/ethosu/vela/architecture_allocator.py index e43b841d..86410cfd 100644 --- a/ethosu/vela/architecture_allocator.py +++ b/ethosu/vela/architecture_allocator.py @@ -196,6 +196,15 @@ def _get_ifm_blocksize( return Shape4D(1, height, width, ofm_block.depth) +def fit_block_for_ofm(arch: ArchitectureFeatures, ofm_shape: Shape4D, kernel: Kernel, block: Shape4D): + # 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes) This is a specific + # interpretation of a more general constraint that can't be applied because the + # find_block_config function must return block configs that can be applied to any OFM shape. + if (ofm_shape.height == 1) and (kernel.height == 1) and (arch.ofm_ublock.height == 2): + return Shape4D(1, min(block.height, ofm_shape.height), block.width, block.depth) + return block + + def find_block_config( arch: ArchitectureFeatures, npu_op_type: NpuBlockType, @@ -274,6 +283,7 @@ def find_block_config( ifm_block = ifm_block.with_depth(ifm_blockdepth) # Test if the IFM/OFM blocks fit into SHRAM + ofm_block = fit_block_for_ofm(arch, ofm_shape, kernel, ofm_block) layout = _try_block_config( arch.shram, ew_usage, ofm_block, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks ) @@ -304,7 +314,7 @@ def find_block_config( config.layout = layout config.bank_size = arch.shram_bank_size config.ifm_block = ifm_block - config.ofm_block = ofm_block + config.ofm_block = Shape4D(1, height, width, depth) else: wont_fit[(width, height)] = True @@ -322,6 +332,7 @@ def try_block_config( block_config: Block, arch: ArchitectureFeatures, npu_op_type: NpuBlockType, + ofm_shape: Block, ifm_shape: Block, ifm2_shape: Optional[Block], uses_scalar: bool, @@ -374,6 +385,9 @@ def try_block_config( if not is_equal_depth_op: ifm_block = ifm_block.with_depth(ifm_blockdepth) + # 256/512 Conv1D optimisation (ratio of IFM:Accumulators changes) + block_config = fit_block_for_ofm(arch, ofm_shape, kernel, block_config) + layout = _try_block_config( arch.shram, ew_usage, block_config, ifm_block, ifm_bits, ifm_granule, acc_bits, acc_granule, lut_banks ) diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index 20431273..b4a633e9 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -583,6 +583,7 @@ def get_arch_block_config( block_config, arch, block_type, + npu_op.ofm.shape, ifm_shape, ifm2_shape, uses_scalar, diff --git a/ethosu/vela/test/test_architecture_allocator.py b/ethosu/vela/test/test_architecture_allocator.py index 94768fc1..a199e9d8 100644 --- a/ethosu/vela/test/test_architecture_allocator.py +++ b/ethosu/vela/test/test_architecture_allocator.py @@ -106,6 +106,7 @@ def test_allocate(test_data): Block.from_shape(config.ofm_block.as_list()), arch, block_type, + ofm_shape, ifm_shape, ifm2_shape, is_partkernel=config.is_partkernel, |