aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Nevalainen <andreas.nevalainen@arm.com>2020-11-17 09:16:11 +0100
committerpatrik.gustavsson <patrik.gustavsson@arm.com>2020-11-18 11:14:20 +0000
commited67b88e4b0ce21c42dabe5b2a199afd8e95b17f (patch)
tree459e051892d3e0e9bb017453fa051c963ec2c06f
parentb0ca274e904ca9080f2baa22fdb538c58b20aab2 (diff)
downloadethos-u-vela-ed67b88e4b0ce21c42dabe5b2a199afd8e95b17f.tar.gz
MLMBED-3468: Update scale tensors SRAM size calculation
Updated SRAM size calculation for scale tensors. Change-Id: Idaecc3bf0c83d58ea70163bfd194c594295b66db Signed-off-by: Andreas Nevalainen <andreas.nevalainen@arm.com>
-rw-r--r--ethosu/vela/scheduler.py49
1 files changed, 26 insertions, 23 deletions
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 526cc0e..4af83a1 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -1030,7 +1030,7 @@ class DynamicProgrammingScheduler:
for cp in sg.cascaded_passes:
if cp.strategy == SchedulingStrategy.IfmStream:
for ps in cp.passes:
- if ps.scale_tensor and (cp.sram_used + ps.scale_tensor.storage_size()) <= self.sram_limit:
+ if ps.scale_tensor:
tens = ps.scale_tensor
# Find op using scale tensor
@@ -1041,28 +1041,31 @@ class DynamicProgrammingScheduler:
new_tens = tens.clone_into_fast_storage(arch)
new_tens.consumer_list = tens.consumer_list.copy()
new_tens.purpose = TensorPurpose.FSBias
-
- # Create DMA cmd
- dma_cmd = Operation(Op.DMA, tens.ops[0].name + "_dma")
- dma_cmd.inputs = [tens]
- dma_cmd.set_output_tensor(new_tens)
- dma_cmd.attrs["source"] = tens.mem_area
- dma_cmd.attrs["destination"] = new_tens.mem_area
- dma_cmd.run_on_npu = True
-
- tens.consumer_list.clear()
- tens.consumer_list.append(dma_cmd)
-
- # Replace tensor and op
- idx = op.inputs.index(tens)
- op.inputs[idx] = new_tens
-
- ps.ops.insert(0, dma_cmd)
- ps.scale_tensor = new_tens
- ps.intermediates.append(new_tens)
- ps.cascade.intermediates.append(new_tens)
-
- cp.sram_used += tens.storage_size()
+ new_tens.element_size_bytes = 10
+ new_tens_size = new_tens.storage_size()
+
+ if (cp.sram_used + new_tens_size) <= self.sram_limit:
+ # Create DMA cmd
+ dma_cmd = Operation(Op.DMA, tens.ops[0].name + "_dma")
+ dma_cmd.inputs = [tens]
+ dma_cmd.set_output_tensor(new_tens)
+ dma_cmd.attrs["source"] = tens.mem_area
+ dma_cmd.attrs["destination"] = new_tens.mem_area
+ dma_cmd.run_on_npu = True
+
+ tens.consumer_list.clear()
+ tens.consumer_list.append(dma_cmd)
+
+ # Replace tensor and op
+ idx = op.inputs.index(tens)
+ op.inputs[idx] = new_tens
+
+ ps.ops.insert(0, dma_cmd)
+ ps.scale_tensor = new_tens
+ ps.intermediates.append(new_tens)
+ ps.cascade.intermediates.append(new_tens)
+
+ cp.sram_used += new_tens_size
def schedule_passes(nng, arch, options: SchedulerOptions):