aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Bohlin <jacob.bohlin@arm.com>2021-08-09 12:22:51 +0100
committerJacob Bohlin <jacob.bohlin@arm.com>2021-08-16 17:40:20 +0200
commitb8060f51dcd6c128629e34855fdf46865ec165f5 (patch)
tree64cf72fd162115dd15d16899fbcfe04fdb655c8f
parent8f78ac2ff735b7c0be7787d6423eb96a0d8b5983 (diff)
downloadethos-u-vela-b8060f51dcd6c128629e34855fdf46865ec165f5.tar.gz
MLBEDSW-4738 Reinstate find_block_config from v2.1
Reinstated the v2.1.0 functionality for find_block_configs(). This is used exclusively by the external API. Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com> Change-Id: I6977f13866957edb083769658cc8c57c2b3556fb
-rw-r--r--ethosu/vela/api.py84
-rw-r--r--ethosu/vela/architecture_allocator.py1
-rw-r--r--ethosu/vela/register_command_stream_generator.py17
3 files changed, 82 insertions, 20 deletions
diff --git a/ethosu/vela/api.py b/ethosu/vela/api.py
index e91c0bdb..e31c373a 100644
--- a/ethosu/vela/api.py
+++ b/ethosu/vela/api.py
@@ -447,9 +447,87 @@ def npu_find_block_configs(npu_op: NpuOperation, accelerator: NpuAccelerator) ->
This function can be used to find a valid value for npu_op.block_config.
The block config is the unit of work in which the NPU generates the OFM.
"""
- from . import register_command_stream_generator
-
- return register_command_stream_generator.find_block_configs(npu_op, accelerator)
+ from .architecture_features import Accelerator
+ from .architecture_features import ArchitectureFeatures
+ from .architecture_features import Block
+ from .architecture_features import create_default_arch
+ from .architecture_allocator import try_block_config
+ from .register_command_stream_generator import resampling_mode_map
+ from .register_command_stream_util import to_kernel
+ from .operation import NpuBlockType
+
+ is_partkernel = False
+ if isinstance(npu_op, NpuConv2DOperation):
+ block_type = NpuBlockType.ConvolutionMxN
+ is_partkernel = npu_op.block_traversal == NpuBlockTraversal.PART_KERNEL_FIRST
+ elif isinstance(npu_op, NpuConvDepthWiseOperation):
+ block_type = NpuBlockType.ConvolutionDepthWise
+ elif isinstance(npu_op, NpuPoolingOperation):
+ block_type = NpuBlockType.ReduceSum if npu_op.sub_op_type == NpuPoolingOp.REDUCE_SUM else NpuBlockType.Pooling
+ elif isinstance(npu_op, NpuElementWiseOperation):
+ block_type = NpuBlockType.ElementWise
+ else:
+ assert 0, "Unsupported operation"
+
+ ifm_shape = Block(npu_op.ifm.shape.width, npu_op.ifm.shape.height, npu_op.ifm.shape.depth)
+ ifm2_shape = None
+ if npu_op.ifm2:
+ ifm2_shape = Block(npu_op.ifm2.shape.width, npu_op.ifm2.shape.height, npu_op.ifm2.shape.depth)
+ ofm_shape = Block(npu_op.ofm.shape.width, npu_op.ofm.shape.height, npu_op.ofm.shape.depth)
+
+ ifm_resampling_mode = resampling_mode_map[npu_op.ifm_upscale]
+ ifm_bits = npu_op.ifm.data_type.size_in_bits()
+ kernel = to_kernel(npu_op.kernel)
+ lut_banks = 0
+ if npu_op.activation:
+ lut_banks = 2 if npu_op.activation.op_type == NpuActivationOp.TABLE_LOOKUP else 0
+
+ has_scaling = True
+ for tensor in [npu_op.ifm, npu_op.ifm2, npu_op.ofm]:
+ if tensor and tensor.quantization is None:
+ has_scaling = False
+ break
+
+ arch = create_default_arch(Accelerator.from_npu_accelerator(accelerator))
+
+ max_block_width = min(arch.ofm_block_max.width, ofm_shape.width)
+ max_block_height = min(arch.ofm_block_max.height, ofm_shape.height)
+ max_block_depth = min(arch.ofm_block_max.depth, ofm_shape.depth)
+
+ min_block_height = max(arch.ofm_ublock.height, 2 if ifm_resampling_mode != NpuResamplingMode.NONE else 1)
+ min_block_width = max(arch.ofm_ublock.width, 2 if ifm_resampling_mode != NpuResamplingMode.NONE else 1)
+
+ valid_block_configs = []
+ for w in range(min_block_width, max_block_width + min_block_width, min_block_width):
+ for h in range(min_block_height, max_block_height + min_block_height, min_block_height):
+ # Try valid OFM block depths
+ for c in range(arch.ofm_ublock.depth, max_block_depth + arch.ofm_ublock.depth, arch.ofm_ublock.depth):
+ # OFM block depth has the constraint that if it causes the OFM to be
+ # split, it must be a multiple of the OFM split size
+ if (c >= max_block_depth) or (c < max_block_depth and (c % ArchitectureFeatures.OFMSplitDepth) == 0):
+ block = Block(w, h, c)
+ config = try_block_config(
+ block,
+ arch,
+ block_type,
+ ofm_shape,
+ ifm_shape,
+ ifm2_shape,
+ npu_op.ifm2_scalar is not None,
+ ifm_bits,
+ is_partkernel,
+ kernel,
+ lut_banks,
+ has_scaling,
+ ifm_resampling_mode,
+ )
+
+ if config:
+ ofm_block = config.ofm_block
+ valid_block_configs.append(NpuShape3D(ofm_block.height, ofm_block.width, ofm_block.depth))
+
+ assert len(valid_block_configs) > 0
+ return valid_block_configs
def npu_generate_register_command_stream(npu_op_list: List[NpuOperation], accelerator: NpuAccelerator) -> List[int]:
diff --git a/ethosu/vela/architecture_allocator.py b/ethosu/vela/architecture_allocator.py
index 3c49eb13..32502e33 100644
--- a/ethosu/vela/architecture_allocator.py
+++ b/ethosu/vela/architecture_allocator.py
@@ -412,4 +412,5 @@ def try_block_config(
config.layout = layout
config.bank_size = arch.shram_bank_size
config.ifm_block = ifm_block
+ config.ofm_block = block_config
return config
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index b4a633e9..d61e5717 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -1025,23 +1025,6 @@ def generate_command_stream(
return res
-# -------------------------------------------------------------------
-# EXTERNAL API
-# -------------------------------------------------------------------
-
-
-def find_block_configs(npu_op: NpuOperation, npu_accelerator: NpuAccelerator) -> List[NpuShape3D]:
- """
- Internal implementation of the public facing API for finding block configs.
- """
- if isinstance(npu_op, NpuBlockOperation):
- # TODO: implement this function
- arch = create_default_arch(Accelerator.from_npu_accelerator(npu_accelerator))
- block = arch.ofm_ublock
- return [NpuShape3D(height=block.height, width=block.width, depth=block.depth)]
- return []
-
-
def generate_register_command_stream(npu_op_list: List[NpuOperation], npu_accelerator: NpuAccelerator) -> List[int]:
"""
Internal implementation of the public facing API for generating an Ethos-U register command stream.