diff options
Diffstat (limited to 'ethosu/vela/compiler_driver.py')
-rw-r--r-- | ethosu/vela/compiler_driver.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py index e495f1ce..b5a6c42d 100644 --- a/ethosu/vela/compiler_driver.py +++ b/ethosu/vela/compiler_driver.py @@ -126,6 +126,7 @@ def compiler_driver(nng, arch, options, scheduler_options): # Placeholders for scratch and flash tensors that are common for all Npu subgraphs scratch_tens = None + scratch_fast_tens = None flash_tens = None # Calculate live ranges for all constant Npu tensors, in permanent storage @@ -199,12 +200,16 @@ def compiler_driver(nng, arch, options, scheduler_options): register_command_stream_generator.generate_register_command_stream( nng, sg, arch, options.verbose_register_command_stream ) - scratch_tens, flash_tens = npu_serialisation.serialise_npu_subgraph_into_tensors( - nng, sg, arch, scratch_tens, flash_tens + scratch_tens, scratch_fast_tens, flash_tens = npu_serialisation.serialise_npu_subgraph_into_tensors( + nng, sg, arch, scratch_tens, scratch_fast_tens, flash_tens ) npu_serialisation.rewrite_npu_call_ops(nng, root_sg, arch) + if root_sg is not None and (arch.feature_map_storage_mem_area != arch.fast_storage_mem_area): + if root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0) > arch.sram_size: + print("Warning: Sram limit has been exceeded, by the scratch fast tensor") + # Allocate all Cpu constant tensors, this is done last because the Npu-ops # have to be serialized into flash and scratch tensors first tensor_allocation.allocate_tensors( |