diff options
author | Jacob Bohlin <jacob.bohlin@arm.com> | 2021-08-09 12:22:51 +0100 |
---|---|---|
committer | Jacob Bohlin <jacob.bohlin@arm.com> | 2021-08-16 17:40:20 +0200 |
commit | b8060f51dcd6c128629e34855fdf46865ec165f5 (patch) | |
tree | 64cf72fd162115dd15d16899fbcfe04fdb655c8f | |
parent | 8f78ac2ff735b7c0be7787d6423eb96a0d8b5983 (diff) | |
download | ethos-u-vela-b8060f51dcd6c128629e34855fdf46865ec165f5.tar.gz |
MLBEDSW-4738 Reinstate find_block_config from v2.1
Reinstated the v2.1.0 functionality for find_block_configs(). This is
used exclusively by the external API.
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: I6977f13866957edb083769658cc8c57c2b3556fb
-rw-r--r-- | ethosu/vela/api.py | 84 | ||||
-rw-r--r-- | ethosu/vela/architecture_allocator.py | 1 | ||||
-rw-r--r-- | ethosu/vela/register_command_stream_generator.py | 17 |
3 files changed, 82 insertions, 20 deletions
diff --git a/ethosu/vela/api.py b/ethosu/vela/api.py index e91c0bdb..e31c373a 100644 --- a/ethosu/vela/api.py +++ b/ethosu/vela/api.py @@ -447,9 +447,87 @@ def npu_find_block_configs(npu_op: NpuOperation, accelerator: NpuAccelerator) -> This function can be used to find a valid value for npu_op.block_config. The block config is the unit of work in which the NPU generates the OFM. """ - from . import register_command_stream_generator - - return register_command_stream_generator.find_block_configs(npu_op, accelerator) + from .architecture_features import Accelerator + from .architecture_features import ArchitectureFeatures + from .architecture_features import Block + from .architecture_features import create_default_arch + from .architecture_allocator import try_block_config + from .register_command_stream_generator import resampling_mode_map + from .register_command_stream_util import to_kernel + from .operation import NpuBlockType + + is_partkernel = False + if isinstance(npu_op, NpuConv2DOperation): + block_type = NpuBlockType.ConvolutionMxN + is_partkernel = npu_op.block_traversal == NpuBlockTraversal.PART_KERNEL_FIRST + elif isinstance(npu_op, NpuConvDepthWiseOperation): + block_type = NpuBlockType.ConvolutionDepthWise + elif isinstance(npu_op, NpuPoolingOperation): + block_type = NpuBlockType.ReduceSum if npu_op.sub_op_type == NpuPoolingOp.REDUCE_SUM else NpuBlockType.Pooling + elif isinstance(npu_op, NpuElementWiseOperation): + block_type = NpuBlockType.ElementWise + else: + assert 0, "Unsupported operation" + + ifm_shape = Block(npu_op.ifm.shape.width, npu_op.ifm.shape.height, npu_op.ifm.shape.depth) + ifm2_shape = None + if npu_op.ifm2: + ifm2_shape = Block(npu_op.ifm2.shape.width, npu_op.ifm2.shape.height, npu_op.ifm2.shape.depth) + ofm_shape = Block(npu_op.ofm.shape.width, npu_op.ofm.shape.height, npu_op.ofm.shape.depth) + + ifm_resampling_mode = resampling_mode_map[npu_op.ifm_upscale] + ifm_bits = npu_op.ifm.data_type.size_in_bits() + kernel = to_kernel(npu_op.kernel) + lut_banks = 0 + if npu_op.activation: + lut_banks = 2 if npu_op.activation.op_type == NpuActivationOp.TABLE_LOOKUP else 0 + + has_scaling = True + for tensor in [npu_op.ifm, npu_op.ifm2, npu_op.ofm]: + if tensor and tensor.quantization is None: + has_scaling = False + break + + arch = create_default_arch(Accelerator.from_npu_accelerator(accelerator)) + + max_block_width = min(arch.ofm_block_max.width, ofm_shape.width) + max_block_height = min(arch.ofm_block_max.height, ofm_shape.height) + max_block_depth = min(arch.ofm_block_max.depth, ofm_shape.depth) + + min_block_height = max(arch.ofm_ublock.height, 2 if ifm_resampling_mode != NpuResamplingMode.NONE else 1) + min_block_width = max(arch.ofm_ublock.width, 2 if ifm_resampling_mode != NpuResamplingMode.NONE else 1) + + valid_block_configs = [] + for w in range(min_block_width, max_block_width + min_block_width, min_block_width): + for h in range(min_block_height, max_block_height + min_block_height, min_block_height): + # Try valid OFM block depths + for c in range(arch.ofm_ublock.depth, max_block_depth + arch.ofm_ublock.depth, arch.ofm_ublock.depth): + # OFM block depth has the constraint that if it causes the OFM to be + # split, it must be a multiple of the OFM split size + if (c >= max_block_depth) or (c < max_block_depth and (c % ArchitectureFeatures.OFMSplitDepth) == 0): + block = Block(w, h, c) + config = try_block_config( + block, + arch, + block_type, + ofm_shape, + ifm_shape, + ifm2_shape, + npu_op.ifm2_scalar is not None, + ifm_bits, + is_partkernel, + kernel, + lut_banks, + has_scaling, + ifm_resampling_mode, + ) + + if config: + ofm_block = config.ofm_block + valid_block_configs.append(NpuShape3D(ofm_block.height, ofm_block.width, ofm_block.depth)) + + assert len(valid_block_configs) > 0 + return valid_block_configs def npu_generate_register_command_stream(npu_op_list: List[NpuOperation], accelerator: NpuAccelerator) -> List[int]: diff --git a/ethosu/vela/architecture_allocator.py b/ethosu/vela/architecture_allocator.py index 3c49eb13..32502e33 100644 --- a/ethosu/vela/architecture_allocator.py +++ b/ethosu/vela/architecture_allocator.py @@ -412,4 +412,5 @@ def try_block_config( config.layout = layout config.bank_size = arch.shram_bank_size config.ifm_block = ifm_block + config.ofm_block = block_config return config diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index b4a633e9..d61e5717 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -1025,23 +1025,6 @@ def generate_command_stream( return res -# ------------------------------------------------------------------- -# EXTERNAL API -# ------------------------------------------------------------------- - - -def find_block_configs(npu_op: NpuOperation, npu_accelerator: NpuAccelerator) -> List[NpuShape3D]: - """ - Internal implementation of the public facing API for finding block configs. - """ - if isinstance(npu_op, NpuBlockOperation): - # TODO: implement this function - arch = create_default_arch(Accelerator.from_npu_accelerator(npu_accelerator)) - block = arch.ofm_ublock - return [NpuShape3D(height=block.height, width=block.width, depth=block.depth)] - return [] - - def generate_register_command_stream(npu_op_list: List[NpuOperation], npu_accelerator: NpuAccelerator) -> List[int]: """ Internal implementation of the public facing API for generating an Ethos-U register command stream. |