aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/shared_buffer_allocation.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/shared_buffer_allocation.py')
-rw-r--r--ethosu/vela/shared_buffer_allocation.py20
1 files changed, 8 insertions, 12 deletions
diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py
index 1f027d60..d8faf369 100644
--- a/ethosu/vela/shared_buffer_allocation.py
+++ b/ethosu/vela/shared_buffer_allocation.py
@@ -32,6 +32,7 @@ from .operation import Kernel
from .operation import NpuBlockType
from .range_set import MemoryRangeSet
from .register_command_stream_util import to_kernel
+from .shape4d import Shape4D
from .tensor import MemArea
@@ -195,14 +196,14 @@ def shared_buffer_allocation_for_pass(arch, ps) -> SharedBufferAllocation:
ifm_bits = ifm_tensor.dtype.size_in_bits()
ifm_shape = ps.primary_op.ifm_shapes[0]
- if ifm_shape != []:
- ifm_depth = ifm_shape[-1]
+ if ifm_tensor.shape != []:
+ ifm_depth = ifm_shape.depth
if is_elementwise:
ifm_count = 2
if ifm_tensor.shape == []: # Scalar in ifm1
assert ifm2_tensor
- ifm_depth = ps.primary_op.ifm_shapes[1][-1]
+ ifm_depth = ps.primary_op.ifm_shapes[1].depth
ifm_count = 1
elif not ifm2_tensor or ifm2_tensor.shape == []: # Scalar in ifm2
ifm_count = 1
@@ -251,7 +252,7 @@ def shared_buffer_allocation_for_npu_op(
ifm_bits=ifm_bits,
ifm_depth=ifm_depth,
ifm_count=ifm_count,
- ofm_shape=ofm_shape,
+ ofm_shape=Shape4D(ofm_shape),
)
@@ -265,14 +266,9 @@ def find_suitable_block_configs(arch, alloc: SharedBufferAllocation) -> List[Tup
# Constrain the search space if the OFM is smaller than the max block size
# - Add other block search constraints here if required
- if len(alloc.ofm_shape) <= 2:
- max_block_height = max_block_width = alloc.ofm_shape[0]
- else:
- max_block_width = alloc.ofm_shape[-2]
- max_block_height = alloc.ofm_shape[-3]
-
- # Common block depth
- max_block_depth = alloc.ofm_shape[-1]
+ max_block_width = alloc.ofm_shape.width
+ max_block_height = alloc.ofm_shape.height
+ max_block_depth = alloc.ofm_shape.depth
# Constrain to valid ranges before search
max_block_width = min(arch.ofm_block_max.width, max_block_width)