aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrik Gustavsson <patrik.gustavsson@arm.com>2020-08-11 16:45:35 +0200
committerFredrik Knutsson <fredrik.knutsson.hunnebo@gmail.com>2020-08-12 12:43:36 +0000
commitc0bb8993268863c63ececf8d076a450c6a4f4dd5 (patch)
tree63af9a6dc3613349c5ed1c3c388aa98371ef61c8
parent89a6bbffaa85d72cd79fd73f457c01486b9258bb (diff)
downloadethos-u-vela-c0bb8993268863c63ececf8d076a450c6a4f4dd5.tar.gz
MLBEDSW-2696 Fix Sram exceeded for Sram spilling
Avoid concat op as predecessor in ifm streaming, when Sram spilling is to be applied. Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com> Change-Id: I2ba6283a7561a12d54a06552a15e122bb082b7a1
-rw-r--r--ethosu/vela/compiler_driver.py7
-rw-r--r--ethosu/vela/scheduler.py16
2 files changed, 20 insertions, 3 deletions
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index b5a6c42d..f407fdc4 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -30,6 +30,7 @@ from . import register_command_stream_generator
from . import scheduler
from . import tensor_allocation
from . import weight_compressor
+from .errors import VelaError
from .nn_graph import PassPlacement
from .nn_graph import TensorAllocator
from .rewrite_graph import verify_graph_health
@@ -208,7 +209,11 @@ def compiler_driver(nng, arch, options, scheduler_options):
if root_sg is not None and (arch.feature_map_storage_mem_area != arch.fast_storage_mem_area):
if root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0) > arch.sram_size:
- print("Warning: Sram limit has been exceeded, by the scratch fast tensor")
+ raise VelaError(
+ "Sram limit {} bytes, has been exceeded by the scratch fast tensor {} bytes".format(
+ arch.sram_size, root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0)
+ )
+ )
# Allocate all Cpu constant tensors, this is done last because the Npu-ops
# have to be serialized into flash and scratch tensors first
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 36bb3c27..cc9278fd 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -640,6 +640,16 @@ class DynamicProgrammingScheduler:
res = self.filter_pareto_frontier(res, remove_equally_good_candidates=True)
return res
+ def avoid_for_spilling(self, pred_candidate):
+ if self.arch.feature_map_storage_mem_area == self.arch.fast_storage_mem_area:
+ return False
+
+ # For SRAM spilling, concat op is avoided as predecessor
+ for op in pred_candidate.ops:
+ if op.type == "ConcatSliceWrite":
+ return True
+ return False
+
def search_ifm_streaming_partial(self, ps, block_config):
if ps.placement != PassPlacement.Npu:
return ABORT_SEARCH
@@ -664,8 +674,10 @@ class DynamicProgrammingScheduler:
# and it only has one successor, namely us
if pred_candidate.placement == PassPlacement.Npu:
if pred_candidate.npu_block_type in self.ifm_stream_npu_blocks:
- # and it is on the Npu and fusable - it's a candidate
- pred_pass_list.append(pred_candidate)
+ # and it is on the Npu
+ if not self.avoid_for_spilling(pred_candidate):
+ # and fusable - it's a candidate
+ pred_pass_list.append(pred_candidate)
if not pred_pass_list:
return ABORT_SEARCH