diff options
author | Tim Hall <tim.hall@arm.com> | 2022-05-19 12:36:58 +0100 |
---|---|---|
committer | tim.hall <tim.hall@arm.com> | 2022-05-19 15:56:19 +0000 |
commit | cda4fcb0fd3e9766a161cf3e5aa7c3283e7f7c9e (patch) | |
tree | 2ca560bcf290bf88ab7a0058098df794486ab528 /ethosu/vela/tensor_allocation.py | |
parent | 8bc7a652607a771e234fda6b05275542ff0fc072 (diff) | |
download | ethos-u-vela-cda4fcb0fd3e9766a161cf3e5aa7c3283e7f7c9e.tar.gz |
MLBEDSW-6563: networks failing with memory area exceeded in vela3.4.0.rc2
- For allocations that have a hard memory limit the Hill Climb allocator
should be given more attempts to find a solution that would fit
- The fix is to use a memory limit when there is a hard constraint, and
a minimum iteration count, reset on every improvement, when there is a soft
constraint
- Added maximum number iterations CLI option
Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: I19ff53a0b68412de280263626778a3102cbe52fa
Diffstat (limited to 'ethosu/vela/tensor_allocation.py')
-rw-r--r-- | ethosu/vela/tensor_allocation.py | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py index ab65740e..1ffae4c4 100644 --- a/ethosu/vela/tensor_allocation.py +++ b/ethosu/vela/tensor_allocation.py @@ -66,9 +66,11 @@ def linear_allocate_live_ranges(live_ranges, alloc_granularity=Tensor.Allocation return total_sz -def hillclimb_allocate_live_ranges(live_ranges: LiveRangeGraph, alloc_granularity: int) -> int: +def hillclimb_allocate_live_ranges( + live_ranges: LiveRangeGraph, alloc_granularity: int, max_iterations: int, mem_limit: int +) -> int: # Allocates using the hill climb allocator - addresses = hillclimb_allocation.allocate_live_ranges(live_ranges.lrs) + addresses = hillclimb_allocation.allocate_live_ranges(live_ranges.lrs, max_iterations, mem_limit) # The result is a list containing the allocated addresses total_sz = 0 for lr, address in zip(live_ranges.lrs, addresses): @@ -144,7 +146,10 @@ def print_allocation(lrs, mem_area, mem_type_set, tensor_allocator, sg, actual_m memory_hist = memory_usage_histogram(lrs.lrs) min_mem_usage_for_alloc = max(memory_hist) - print("Start Time - End Time: Start Addr - End Addr: Tensor Size: Memory Usage: Tensor Purpose: Tensor Name") + print( + f"{'Start Time':>10s} - {'End Time':>10s}: {'Start Addr':>10s} - {'End Addr':>10s}: {'Tensor Size':>11s}:" + f" {'Memory Usage':>12s}: {'Purpose':12s}: Name" + ) for start_time, end_time, size, start_addr, end_addr, purpose, name in sorted( ( lr.start_time, @@ -159,7 +164,7 @@ def print_allocation(lrs, mem_area, mem_type_set, tensor_allocator, sg, actual_m ): print( f"{start_time:10d} - {end_time:10d}: {start_addr:#10x} - {end_addr:#10x}: {size:11d}:" - f" {memory_hist[start_time]:12d}: {purpose.display_name():15s}: {name:s}" + f" {memory_hist[start_time]:12d}: {purpose.display_name():12s}: {name:s}" ) alloc_overhead_fraction = (actual_mem_usage_for_alloc - min_mem_usage_for_alloc) / min_mem_usage_for_alloc @@ -194,6 +199,7 @@ def allocate( tensor_allocator=TensorAllocator.Greedy, lr_graph=None, cpu_tensor_alignment=Tensor.AllocationQuantum, + hillclimb_max_iterations=None, ): # Allocates addresses to tensors, returns False if tensors could not be fit within max_size lrs = live_range.extract_live_ranges_from_cascaded_passes( @@ -207,12 +213,14 @@ def allocate( if lrs.ranges: tens_alloc = tensor_allocator if tens_alloc == TensorAllocator.Greedy: - total_sz = greedy_allocate_live_ranges(sg, arch, lrs, mem_area, cpu_tensor_alignment) + total_sz = greedy_allocate_live_ranges(lrs, cpu_tensor_alignment) verify_allocation(lrs, cpu_tensor_alignment) elif tens_alloc == TensorAllocator.LinearAlloc: total_sz = linear_allocate_live_ranges(lrs, cpu_tensor_alignment) elif tens_alloc == TensorAllocator.HillClimb: - total_sz = hillclimb_allocate_live_ranges(lrs, cpu_tensor_alignment) + mem_type = MemType.Scratch_fast if MemType.Scratch_fast in mem_type_set else list(mem_type_set)[0] + mem_size = arch.mem_type_size(mem_type) + total_sz = hillclimb_allocate_live_ranges(lrs, cpu_tensor_alignment, hillclimb_max_iterations, mem_size) else: assert 0 return lrs, total_sz @@ -228,6 +236,7 @@ def allocate_tensors( verbose_allocation=False, lr_graph=None, cpu_tensor_alignment=Tensor.AllocationQuantum, + hillclimb_max_iterations=None, max_size=None, dry_test=False, ): @@ -240,6 +249,7 @@ def allocate_tensors( tensor_allocator=tensor_allocator, lr_graph=lr_graph, cpu_tensor_alignment=cpu_tensor_alignment, + hillclimb_max_iterations=hillclimb_max_iterations, ) if lrs.ranges: |