diff options
author | Patrik Gustavsson <patrik.gustavsson@arm.com> | 2020-12-16 13:08:06 +0100 |
---|---|---|
committer | Patrik Gustavsson <patrik.gustavsson@arm.com> | 2020-12-21 07:34:05 +0100 |
commit | bf31d647dc5df47410ee577b12427ddf076d816b (patch) | |
tree | 85ddd620916565aa8565d072b764ca4918b405a1 /ethosu/vela/shared_buffer_allocation.py | |
parent | 2349d429d926e258e9a61d34c7fd97660ab9fb98 (diff) | |
download | ethos-u-vela-bf31d647dc5df47410ee577b12427ddf076d816b.tar.gz |
MLBEDSW-3645 4D class for op ifm/ofm shapes
Add 4D shape class for op Ifm/ofm shapes
Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com>
Change-Id: Ic0a98da9d2f9d085605e39a9ab5a26bad6e702a3
Diffstat (limited to 'ethosu/vela/shared_buffer_allocation.py')
-rw-r--r-- | ethosu/vela/shared_buffer_allocation.py | 20 |
1 files changed, 8 insertions, 12 deletions
diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py index 1f027d60..d8faf369 100644 --- a/ethosu/vela/shared_buffer_allocation.py +++ b/ethosu/vela/shared_buffer_allocation.py @@ -32,6 +32,7 @@ from .operation import Kernel from .operation import NpuBlockType from .range_set import MemoryRangeSet from .register_command_stream_util import to_kernel +from .shape4d import Shape4D from .tensor import MemArea @@ -195,14 +196,14 @@ def shared_buffer_allocation_for_pass(arch, ps) -> SharedBufferAllocation: ifm_bits = ifm_tensor.dtype.size_in_bits() ifm_shape = ps.primary_op.ifm_shapes[0] - if ifm_shape != []: - ifm_depth = ifm_shape[-1] + if ifm_tensor.shape != []: + ifm_depth = ifm_shape.depth if is_elementwise: ifm_count = 2 if ifm_tensor.shape == []: # Scalar in ifm1 assert ifm2_tensor - ifm_depth = ps.primary_op.ifm_shapes[1][-1] + ifm_depth = ps.primary_op.ifm_shapes[1].depth ifm_count = 1 elif not ifm2_tensor or ifm2_tensor.shape == []: # Scalar in ifm2 ifm_count = 1 @@ -251,7 +252,7 @@ def shared_buffer_allocation_for_npu_op( ifm_bits=ifm_bits, ifm_depth=ifm_depth, ifm_count=ifm_count, - ofm_shape=ofm_shape, + ofm_shape=Shape4D(ofm_shape), ) @@ -265,14 +266,9 @@ def find_suitable_block_configs(arch, alloc: SharedBufferAllocation) -> List[Tup # Constrain the search space if the OFM is smaller than the max block size # - Add other block search constraints here if required - if len(alloc.ofm_shape) <= 2: - max_block_height = max_block_width = alloc.ofm_shape[0] - else: - max_block_width = alloc.ofm_shape[-2] - max_block_height = alloc.ofm_shape[-3] - - # Common block depth - max_block_depth = alloc.ofm_shape[-1] + max_block_width = alloc.ofm_shape.width + max_block_height = alloc.ofm_shape.height + max_block_depth = alloc.ofm_shape.depth # Constrain to valid ranges before search max_block_width = min(arch.ofm_block_max.width, max_block_width) |