diff options
author | Patrik Gustavsson <patrik.gustavsson@arm.com> | 2020-05-27 09:15:11 +0200 |
---|---|---|
committer | Patrik Gustavsson <patrik.gustavsson@arm.com> | 2020-06-25 11:42:56 +0200 |
commit | eca2e95e1fea150d8a942f8b5f0a4d9d7aefebc1 (patch) | |
tree | 438b385f1ded3c18c3b84d2204a57c39be6be34a /ethosu/vela/tensor_allocation.py | |
parent | eec4e50e19cb5522640eae5fd4566917dc2a7b9d (diff) | |
download | ethos-u-vela-eca2e95e1fea150d8a942f8b5f0a4d9d7aefebc1.tar.gz |
MLBEDSW-2306 Added more supported mem-cfgs
Additional supported memory configurations:
-Permanent_storage = DRAM
-Tensor arena either in DRAM or SRAM
Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com>
Change-Id: I20beb7151e306bfdba540e7c0b2a7b478b4d94e1
Diffstat (limited to 'ethosu/vela/tensor_allocation.py')
-rw-r--r-- | ethosu/vela/tensor_allocation.py | 28 |
1 files changed, 21 insertions, 7 deletions
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py index e3952df3..f29296d1 100644 --- a/ethosu/vela/tensor_allocation.py +++ b/ethosu/vela/tensor_allocation.py @@ -25,6 +25,7 @@ from . import numeric_util from .greedy_allocation import allocate_live_ranges as greedy_allocate_live_ranges from .nn_graph import TensorAllocator from .tensor import MemArea +from .tensor import MemType def linear_allocate_live_ranges(live_ranges, alloc_granularity=16): @@ -66,12 +67,13 @@ def mark_sram_used_for_cascaded_passes(sg, lrs): ps.sram_used = sram_used -def print_allocation(lrs, mem_area, sg, verbose_allocation, show_minimum_possible_allocation): +def print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation, show_minimum_possible_allocation): if verbose_allocation: - if mem_area == MemArea.Sram: - print("allocation for", mem_area, "- non-constant tensors in Cpu and Npu subgraphs") - else: + if mem_type_set == set((MemType.Permanent_NPU,)) or mem_type_set == set((MemType.Permanent_CPU,)): print("allocation for", mem_area, "- constant tensors in", sg.placement.name, "subgraph(s)") + else: + print("allocation for", mem_area, "- non-constant tensors in Cpu and Npu subgraphs") + for start_time, start, end, name, end_time in sorted( ( lr.start_time, @@ -99,6 +101,7 @@ def allocate_tensors( sg, arch, mem_area, + mem_type_set, use_ifm_ofm_overlap=True, tensor_allocator=TensorAllocator.Greedy, verbose_allocation=False, @@ -109,6 +112,7 @@ def allocate_tensors( lrs = live_range.extract_live_ranges_from_cascaded_passes( sg, mem_area, + mem_type_set, mark_output_tensors_overlapping_with_input_tensors=False, use_ifm_ofm_overlap=use_ifm_ofm_overlap, ignore_subgraph_input_output_tensors=ignore_subgraph_input_output_tensors, @@ -120,16 +124,26 @@ def allocate_tensors( if tens_alloc == TensorAllocator.Greedy: total_sz = greedy_allocate_live_ranges(sg, arch, lrs, mem_area, verbose_allocation) elif tens_alloc == TensorAllocator.LinearAlloc: - total_sz = linear_allocate_live_ranges(lrs) + total_sz = linear_allocate_live_ranges(lrs, 16) else: assert 0 - sg.memory_used[mem_area] = total_sz + if sg.memory_used.get(mem_area, 0) == 0: + sg.memory_used[mem_area] = total_sz + else: + sg.memory_used[mem_area] += total_sz + + # Keep track of how much should be used for scratch or permanent storage for NPU + for mem_type in mem_type_set: + if sg.memory_used_per_type.get(mem_type, 0) == 0: + sg.memory_used_per_type[mem_type] = total_sz + else: + sg.memory_used_per_type[mem_type] += total_sz nng.total_size[mem_area] = nng.total_size.get(mem_area, 0) + sum(tens.storage_size() for tens in lrs.ranges) nng.total_elements[mem_area] = nng.total_elements.get(mem_area, 0) + sum(tens.elements() for tens in lrs.ranges) - print_allocation(lrs, mem_area, sg, verbose_allocation, show_minimum_possible_allocation) + print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation, show_minimum_possible_allocation) if mem_area == MemArea.Sram: # Mark Sram usage for all subgraphs |