aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/compiler_driver.py
diff options
context:
space:
mode:
authorPatrik Gustavsson <patrik.gustavsson@arm.com>2020-05-27 09:15:11 +0200
committerPatrik Gustavsson <patrik.gustavsson@arm.com>2020-06-25 11:42:56 +0200
commiteca2e95e1fea150d8a942f8b5f0a4d9d7aefebc1 (patch)
tree438b385f1ded3c18c3b84d2204a57c39be6be34a /ethosu/vela/compiler_driver.py
parenteec4e50e19cb5522640eae5fd4566917dc2a7b9d (diff)
downloadethos-u-vela-eca2e95e1fea150d8a942f8b5f0a4d9d7aefebc1.tar.gz
MLBEDSW-2306 Added more supported mem-cfgs
Additional supported memory configurations: -Permanent_storage = DRAM -Tensor arena either in DRAM or SRAM Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com> Change-Id: I20beb7151e306bfdba540e7c0b2a7b478b4d94e1
Diffstat (limited to 'ethosu/vela/compiler_driver.py')
-rw-r--r--ethosu/vela/compiler_driver.py55
1 files changed, 37 insertions, 18 deletions
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index 9c345dba..e495f1ce 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -33,7 +33,7 @@ from . import weight_compressor
from .nn_graph import PassPlacement
from .nn_graph import TensorAllocator
from .rewrite_graph import verify_graph_health
-from .tensor import MemArea
+from .tensor import MemType
class CompilerOptions:
@@ -120,9 +120,6 @@ def compiler_driver(nng, arch, options, scheduler_options):
# block config, and calc and pack the scales and biases
weight_compressor.update_pass_weight_and_scale_tensors(nng, arch)
- # Memory area for all non-constant tensors (Cpu and Npu)
- non_const_mem_area = MemArea.Sram
-
# LiveRanges for constant tensors for all Npu subgraphs
permanent_storage = arch.permanent_storage_mem_area
lr_graph_flash = live_range.LiveRangeGraph()
@@ -135,7 +132,11 @@ def compiler_driver(nng, arch, options, scheduler_options):
for sg in nng.subgraphs:
if sg.placement == PassPlacement.Npu:
lr_graph_flash = live_range.extract_live_ranges_from_cascaded_passes(
- sg, permanent_storage, ignore_subgraph_input_output_tensors=True, lr_graph=lr_graph_flash
+ sg,
+ permanent_storage,
+ MemType.Permanent_NPU,
+ ignore_subgraph_input_output_tensors=True,
+ lr_graph=lr_graph_flash,
)
if len(nng.subgraphs) > 1:
@@ -143,12 +144,12 @@ def compiler_driver(nng, arch, options, scheduler_options):
# processed first during serialization into tensors
first_npu_sg = nng.subgraphs[1]
assert first_npu_sg.placement == PassPlacement.Npu
- # Use the linear allocator for constant tensors
tensor_allocation.allocate_tensors(
nng,
first_npu_sg,
arch,
permanent_storage,
+ set((MemType.Permanent_NPU,)),
scheduler_options.use_ifm_ofm_overlap,
TensorAllocator.LinearAlloc,
options.verbose_allocation,
@@ -159,19 +160,36 @@ def compiler_driver(nng, arch, options, scheduler_options):
# Allocate all non-constant tensors to the root, i.e. Cpu, subgraph. This step
# will start at the root subgraph's input and traverse from top to bottom. When
# it comes across an Npu-op it will extract live ranges for it's corresponding
- # Npu subgraph and add them to the root's live range graph. Finally, all of the
- # non-constant tensors are allocated together
+ # Npu subgraph and add them to the root's live range graph.
+ # The non-constant tensors are stored either in arch.feature_map_storage_mem_area or
+ # arch.fast_storage_mem_area.
+ # When these memory areas are the same, all non-constant tensors are allocated together.
+ # Otherwise they are allocated separately.
+
root_sg = nng.get_root_subgraph()
- tensor_allocation.allocate_tensors(
- nng,
- root_sg,
- arch,
- non_const_mem_area,
- scheduler_options.use_ifm_ofm_overlap,
- options.tensor_allocator,
- options.verbose_allocation,
- options.show_minimum_possible_allocation,
- )
+
+ alloc_list = []
+ if arch.feature_map_storage_mem_area == arch.fast_storage_mem_area:
+ mem_alloc_scratch = (arch.feature_map_storage_mem_area, set((MemType.Scratch, MemType.Scratch_fast)))
+ alloc_list.append(mem_alloc_scratch)
+ else:
+ mem_alloc_scratch = (arch.feature_map_storage_mem_area, set((MemType.Scratch,)))
+ mem_alloc_scratch_fast = (arch.fast_storage_mem_area, set((MemType.Scratch_fast,)))
+ alloc_list.append(mem_alloc_scratch)
+ alloc_list.append(mem_alloc_scratch_fast)
+
+ for alloc in alloc_list:
+ tensor_allocation.allocate_tensors(
+ nng,
+ root_sg,
+ arch,
+ alloc[0],
+ alloc[1],
+ scheduler_options.use_ifm_ofm_overlap,
+ options.tensor_allocator,
+ options.verbose_allocation,
+ options.show_minimum_possible_allocation,
+ )
# Generate command streams and serialise Npu-ops into tensors
for sg in nng.subgraphs:
@@ -194,6 +212,7 @@ def compiler_driver(nng, arch, options, scheduler_options):
root_sg,
arch,
permanent_storage,
+ set((MemType.Permanent_CPU,)),
scheduler_options.use_ifm_ofm_overlap,
TensorAllocator.LinearAlloc,
options.verbose_allocation,