aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/compiler_driver.py
diff options
context:
space:
mode:
authorPatrik Gustavsson <patrik.gustavsson@arm.com>2020-06-29 17:36:55 +0200
committertim.hall <tim.hall@arm.com>2020-07-07 15:23:20 +0000
commit3ab9452881a15d88710f4b5d7c14ba5069e74948 (patch)
tree5f942504d52094f728d974c5bb66ae67aa83eedb /ethosu/vela/compiler_driver.py
parente843d3311b8945baa32654af0dccb229b6861438 (diff)
downloadethos-u-vela-3ab9452881a15d88710f4b5d7c14ba5069e74948.tar.gz
MLBEDSW-2551 Add support for more mem-cfgs
Added support for one more memory configuration- Change-Id: Iac19992386e3e9b80bd519acb1b0a399c47d736f Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com>
Diffstat (limited to 'ethosu/vela/compiler_driver.py')
-rw-r--r--ethosu/vela/compiler_driver.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index e495f1ce..b5a6c42d 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -126,6 +126,7 @@ def compiler_driver(nng, arch, options, scheduler_options):
# Placeholders for scratch and flash tensors that are common for all Npu subgraphs
scratch_tens = None
+ scratch_fast_tens = None
flash_tens = None
# Calculate live ranges for all constant Npu tensors, in permanent storage
@@ -199,12 +200,16 @@ def compiler_driver(nng, arch, options, scheduler_options):
register_command_stream_generator.generate_register_command_stream(
nng, sg, arch, options.verbose_register_command_stream
)
- scratch_tens, flash_tens = npu_serialisation.serialise_npu_subgraph_into_tensors(
- nng, sg, arch, scratch_tens, flash_tens
+ scratch_tens, scratch_fast_tens, flash_tens = npu_serialisation.serialise_npu_subgraph_into_tensors(
+ nng, sg, arch, scratch_tens, scratch_fast_tens, flash_tens
)
npu_serialisation.rewrite_npu_call_ops(nng, root_sg, arch)
+ if root_sg is not None and (arch.feature_map_storage_mem_area != arch.fast_storage_mem_area):
+ if root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0) > arch.sram_size:
+ print("Warning: Sram limit has been exceeded, by the scratch fast tensor")
+
# Allocate all Cpu constant tensors, this is done last because the Npu-ops
# have to be serialized into flash and scratch tensors first
tensor_allocation.allocate_tensors(