From c0bb8993268863c63ececf8d076a450c6a4f4dd5 Mon Sep 17 00:00:00 2001 From: Patrik Gustavsson Date: Tue, 11 Aug 2020 16:45:35 +0200 Subject: MLBEDSW-2696 Fix Sram exceeded for Sram spilling Avoid concat op as predecessor in ifm streaming, when Sram spilling is to be applied. Signed-off-by: Patrik Gustavsson Change-Id: I2ba6283a7561a12d54a06552a15e122bb082b7a1 --- ethosu/vela/compiler_driver.py | 7 ++++++- ethosu/vela/scheduler.py | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'ethosu') diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py index b5a6c42d..f407fdc4 100644 --- a/ethosu/vela/compiler_driver.py +++ b/ethosu/vela/compiler_driver.py @@ -30,6 +30,7 @@ from . import register_command_stream_generator from . import scheduler from . import tensor_allocation from . import weight_compressor +from .errors import VelaError from .nn_graph import PassPlacement from .nn_graph import TensorAllocator from .rewrite_graph import verify_graph_health @@ -208,7 +209,11 @@ def compiler_driver(nng, arch, options, scheduler_options): if root_sg is not None and (arch.feature_map_storage_mem_area != arch.fast_storage_mem_area): if root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0) > arch.sram_size: - print("Warning: Sram limit has been exceeded, by the scratch fast tensor") + raise VelaError( + "Sram limit {} bytes, has been exceeded by the scratch fast tensor {} bytes".format( + arch.sram_size, root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0) + ) + ) # Allocate all Cpu constant tensors, this is done last because the Npu-ops # have to be serialized into flash and scratch tensors first diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index 36bb3c27..cc9278fd 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -640,6 +640,16 @@ class DynamicProgrammingScheduler: res = self.filter_pareto_frontier(res, remove_equally_good_candidates=True) return res + def avoid_for_spilling(self, pred_candidate): + if self.arch.feature_map_storage_mem_area == self.arch.fast_storage_mem_area: + return False + + # For SRAM spilling, concat op is avoided as predecessor + for op in pred_candidate.ops: + if op.type == "ConcatSliceWrite": + return True + return False + def search_ifm_streaming_partial(self, ps, block_config): if ps.placement != PassPlacement.Npu: return ABORT_SEARCH @@ -664,8 +674,10 @@ class DynamicProgrammingScheduler: # and it only has one successor, namely us if pred_candidate.placement == PassPlacement.Npu: if pred_candidate.npu_block_type in self.ifm_stream_npu_blocks: - # and it is on the Npu and fusable - it's a candidate - pred_pass_list.append(pred_candidate) + # and it is on the Npu + if not self.avoid_for_spilling(pred_candidate): + # and fusable - it's a candidate + pred_pass_list.append(pred_candidate) if not pred_pass_list: return ABORT_SEARCH -- cgit v1.2.1