aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/tensor_allocation.py
diff options
context:
space:
mode:
authorPatrik Gustavsson <patrik.gustavsson@arm.com>2020-05-27 09:15:11 +0200
committerPatrik Gustavsson <patrik.gustavsson@arm.com>2020-06-25 11:42:56 +0200
commiteca2e95e1fea150d8a942f8b5f0a4d9d7aefebc1 (patch)
tree438b385f1ded3c18c3b84d2204a57c39be6be34a /ethosu/vela/tensor_allocation.py
parenteec4e50e19cb5522640eae5fd4566917dc2a7b9d (diff)
downloadethos-u-vela-eca2e95e1fea150d8a942f8b5f0a4d9d7aefebc1.tar.gz
MLBEDSW-2306 Added more supported mem-cfgs
Additional supported memory configurations: -Permanent_storage = DRAM -Tensor arena either in DRAM or SRAM Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com> Change-Id: I20beb7151e306bfdba540e7c0b2a7b478b4d94e1
Diffstat (limited to 'ethosu/vela/tensor_allocation.py')
-rw-r--r--ethosu/vela/tensor_allocation.py28
1 files changed, 21 insertions, 7 deletions
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py
index e3952df3..f29296d1 100644
--- a/ethosu/vela/tensor_allocation.py
+++ b/ethosu/vela/tensor_allocation.py
@@ -25,6 +25,7 @@ from . import numeric_util
from .greedy_allocation import allocate_live_ranges as greedy_allocate_live_ranges
from .nn_graph import TensorAllocator
from .tensor import MemArea
+from .tensor import MemType
def linear_allocate_live_ranges(live_ranges, alloc_granularity=16):
@@ -66,12 +67,13 @@ def mark_sram_used_for_cascaded_passes(sg, lrs):
ps.sram_used = sram_used
-def print_allocation(lrs, mem_area, sg, verbose_allocation, show_minimum_possible_allocation):
+def print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation, show_minimum_possible_allocation):
if verbose_allocation:
- if mem_area == MemArea.Sram:
- print("allocation for", mem_area, "- non-constant tensors in Cpu and Npu subgraphs")
- else:
+ if mem_type_set == set((MemType.Permanent_NPU,)) or mem_type_set == set((MemType.Permanent_CPU,)):
print("allocation for", mem_area, "- constant tensors in", sg.placement.name, "subgraph(s)")
+ else:
+ print("allocation for", mem_area, "- non-constant tensors in Cpu and Npu subgraphs")
+
for start_time, start, end, name, end_time in sorted(
(
lr.start_time,
@@ -99,6 +101,7 @@ def allocate_tensors(
sg,
arch,
mem_area,
+ mem_type_set,
use_ifm_ofm_overlap=True,
tensor_allocator=TensorAllocator.Greedy,
verbose_allocation=False,
@@ -109,6 +112,7 @@ def allocate_tensors(
lrs = live_range.extract_live_ranges_from_cascaded_passes(
sg,
mem_area,
+ mem_type_set,
mark_output_tensors_overlapping_with_input_tensors=False,
use_ifm_ofm_overlap=use_ifm_ofm_overlap,
ignore_subgraph_input_output_tensors=ignore_subgraph_input_output_tensors,
@@ -120,16 +124,26 @@ def allocate_tensors(
if tens_alloc == TensorAllocator.Greedy:
total_sz = greedy_allocate_live_ranges(sg, arch, lrs, mem_area, verbose_allocation)
elif tens_alloc == TensorAllocator.LinearAlloc:
- total_sz = linear_allocate_live_ranges(lrs)
+ total_sz = linear_allocate_live_ranges(lrs, 16)
else:
assert 0
- sg.memory_used[mem_area] = total_sz
+ if sg.memory_used.get(mem_area, 0) == 0:
+ sg.memory_used[mem_area] = total_sz
+ else:
+ sg.memory_used[mem_area] += total_sz
+
+ # Keep track of how much should be used for scratch or permanent storage for NPU
+ for mem_type in mem_type_set:
+ if sg.memory_used_per_type.get(mem_type, 0) == 0:
+ sg.memory_used_per_type[mem_type] = total_sz
+ else:
+ sg.memory_used_per_type[mem_type] += total_sz
nng.total_size[mem_area] = nng.total_size.get(mem_area, 0) + sum(tens.storage_size() for tens in lrs.ranges)
nng.total_elements[mem_area] = nng.total_elements.get(mem_area, 0) + sum(tens.elements() for tens in lrs.ranges)
- print_allocation(lrs, mem_area, sg, verbose_allocation, show_minimum_possible_allocation)
+ print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation, show_minimum_possible_allocation)
if mem_area == MemArea.Sram:
# Mark Sram usage for all subgraphs