diff options
author | Tim Hall <tim.hall@arm.com> | 2021-05-27 18:49:40 +0100 |
---|---|---|
committer | Tim Hall <tim.hall@arm.com> | 2021-05-27 18:57:39 +0100 |
commit | d8339a75c9b655c0507e34238078fdad068b4023 (patch) | |
tree | 36a14726b30760169a83c0356803b480992fade8 /ethosu/vela/tensor_allocation.py | |
parent | 64556f32ff7bfca6036a6598034464b13b64a4ef (diff) | |
download | ethos-u-vela-d8339a75c9b655c0507e34238078fdad068b4023.tar.gz |
MLBEDSW-4034: New Scheduler Size or Performance Optimisation
- Merged dev/scheduler at 83639f90e8c828f70de6e29142355a940224959b
Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: I0050529d4b42da93768c7264296434dd877fb5b4
Diffstat (limited to 'ethosu/vela/tensor_allocation.py')
-rw-r--r-- | ethosu/vela/tensor_allocation.py | 39 |
1 files changed, 32 insertions, 7 deletions
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py index 724c7c0d..d3e2a037 100644 --- a/ethosu/vela/tensor_allocation.py +++ b/ethosu/vela/tensor_allocation.py @@ -106,6 +106,8 @@ def verify_allocation(live_ranges: LiveRangeGraph, alignment: int): def mark_sram_used_for_cascaded_passes(sg, lrs): + if len(sg.cascaded_passes) < 1: + return end_pos = max(ps.time for ps in sg.cascaded_passes) + 2 mem_usage = np.zeros(end_pos, dtype=np.int64) @@ -169,18 +171,14 @@ def memory_usage_histogram(lrs: List[LiveRange]): return histogram -def allocate_tensors( - nng, +def allocate( sg, arch, mem_area, mem_type_set, tensor_allocator=TensorAllocator.Greedy, - verbose_allocation=False, lr_graph=None, cpu_tensor_alignment=Tensor.AllocationQuantum, - max_size=None, - dry_test=False, ): # Allocates addresses to tensors, returns False if tensors could not be fit within max_size ignore_subgraph_input_output_tensors = False @@ -192,7 +190,7 @@ def allocate_tensors( lr_graph=lr_graph, cpu_tensor_alignment=cpu_tensor_alignment, ) - + total_sz = 0 if lrs.ranges: tens_alloc = tensor_allocator if tens_alloc == TensorAllocator.Greedy: @@ -204,6 +202,34 @@ def allocate_tensors( total_sz = hillclimb_allocate_live_ranges(lrs, cpu_tensor_alignment) else: assert 0 + return lrs, total_sz + + +def allocate_tensors( + nng, + sg, + arch, + mem_area, + mem_type_set, + tensor_allocator=TensorAllocator.Greedy, + verbose_allocation=False, + lr_graph=None, + cpu_tensor_alignment=Tensor.AllocationQuantum, + max_size=None, + dry_test=False, +): + # Allocates addresses to tensors, returns False if tensors could not be fit within max_size + lrs, total_sz = allocate( + sg, + arch, + mem_area, + mem_type_set, + tensor_allocator=tensor_allocator, + lr_graph=lr_graph, + cpu_tensor_alignment=cpu_tensor_alignment, + ) + + if lrs.ranges: alloc_ok = max_size is None or total_sz <= max_size if dry_test or not alloc_ok: # Dry test or allocation failed; undo allocation @@ -233,5 +259,4 @@ def allocate_tensors( if sg == nng.get_root_subgraph(): nng.memory_used = sg.memory_used - return True |