diff options
author | Jacob Bohlin <jacob.bohlin@arm.com> | 2020-08-28 13:25:14 +0200 |
---|---|---|
committer | Jacob Bohlin <jacob.bohlin@arm.com> | 2020-09-03 10:50:10 +0200 |
commit | 0628a8c0136eebf3af8db7fd40b7aed94ff5d670 (patch) | |
tree | 65e7a49264f2df1f969122d8d7fb812773c0d4db /ethosu/vela/tensor_allocation.py | |
parent | d2e3355813a33ccefaf112750f86c4f04d6ea12c (diff) | |
download | ethos-u-vela-0628a8c0136eebf3af8db7fd40b7aed94ff5d670.tar.gz |
MLBEDSW-2567: CLI option to specify allocation alignment
Added the CLI option. Only applies to CPU tensors. Added an
AllocationError which is raised when Allocation fails.
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: I89164dea3ac7b7add7bc40aec2ce8fe50600105d
Diffstat (limited to 'ethosu/vela/tensor_allocation.py')
-rw-r--r-- | ethosu/vela/tensor_allocation.py | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py index bb91145e..2d464eec 100644 --- a/ethosu/vela/tensor_allocation.py +++ b/ethosu/vela/tensor_allocation.py @@ -22,14 +22,16 @@ import numpy as np from . import live_range from . import numeric_util +from .errors import AllocationError from .greedy_allocation import allocate_live_ranges as greedy_allocate_live_ranges from .nn_graph import TensorAllocator from .tensor import MemArea from .tensor import MemType +from .tensor import Tensor from .tensor import TensorPurpose -def linear_allocate_live_ranges(live_ranges, alloc_granularity=16): +def linear_allocate_live_ranges(live_ranges, alloc_granularity=Tensor.AllocationQuantum): # Allocates using increasing addresses. Duplicate constant tensors will be allocated to the same address total_sz = 0 allocated_tensors = [] @@ -55,9 +57,19 @@ def linear_allocate_live_ranges(live_ranges, alloc_granularity=16): if address == total_sz: total_sz += numeric_util.round_up(int(math.ceil(lr.size)), alloc_granularity) + verify_alignment(live_ranges, alloc_granularity) return total_sz +def verify_alignment(live_ranges, alignment): + for lr in live_ranges.ranges.values(): + for tens in lr.tensors: + if not all(op and op.run_on_npu for op in tens.ops + tens.consumer_list): + # This is a CPU tensor, verify alignment + if tens.address % alignment != 0: + raise AllocationError("Tensor {} not aligned to {} bytes".format(tens.name, alignment)) + + def mark_sram_used_for_cascaded_passes(sg, lrs): end_pos = max(ps.time for ps in sg.cascaded_passes) + 2 mem_usage = np.zeros(end_pos, dtype=np.int64) @@ -113,6 +125,7 @@ def allocate_tensors( verbose_allocation=False, show_minimum_possible_allocation=False, lr_graph=None, + allocation_alignment=Tensor.AllocationQuantum, ): ignore_subgraph_input_output_tensors = False lrs = live_range.extract_live_ranges_from_cascaded_passes( @@ -123,14 +136,15 @@ def allocate_tensors( use_ifm_ofm_overlap=use_ifm_ofm_overlap, ignore_subgraph_input_output_tensors=ignore_subgraph_input_output_tensors, lr_graph=lr_graph, + allocation_alignment=allocation_alignment, ) if lrs.ranges: tens_alloc = tensor_allocator if tens_alloc == TensorAllocator.Greedy: - total_sz = greedy_allocate_live_ranges(sg, arch, lrs, mem_area, verbose_allocation) + total_sz = greedy_allocate_live_ranges(sg, arch, lrs, mem_area, allocation_alignment, verbose_allocation) elif tens_alloc == TensorAllocator.LinearAlloc: - total_sz = linear_allocate_live_ranges(lrs, 16) + total_sz = linear_allocate_live_ranges(lrs, allocation_alignment) else: assert 0 |