aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/scheduler.py
diff options
context:
space:
mode:
Diffstat (limited to 'ethosu/vela/scheduler.py')
-rw-r--r--ethosu/vela/scheduler.py56
1 files changed, 8 insertions, 48 deletions
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index a50f262e..16531c2c 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -60,7 +60,6 @@ from .nn_graph import Subgraph
from .live_range import ofm_can_reuse_ifm
from .numeric_util import round_down
from .numeric_util import round_up
-from .operation import NpuBlockType
from .operation import Op
from .shape4d import Shape4D
from .tensor import MemArea
@@ -213,6 +212,14 @@ class SchedulerOperation:
ps.ofm_tensor.format,
)
+ # LUT must be placed in shram area. The copy is done by DMA
+ # generated by the high level command stream generator.
+ for idx, tens in enumerate(self.parent_op.inputs):
+ if tens.purpose == TensorPurpose.LUT:
+ new_tens = tens.clone_into_shram(self.arch)
+ new_tens.consumer_list.append(self.parent_op)
+ self.parent_op.inputs[idx] = new_tens
+
# Input volume width and height required to produce the smallest possible stripe
self.min_stripe_input_w, self.min_stripe_input_h = self._calculate_min_stripe_input()
@@ -1379,52 +1386,6 @@ class Scheduler:
)
assert max(max_mem_usage) <= staging_limit, "Allocation exceeds staging limit"
- def move_constant_data(self):
- """Determine if data can be moved from permanent storage to another memory area. A move will generate a DMA
- command in the high-level command stream"""
- for sched_op in self.sched_ops:
- parent_op = sched_op.parent_op
- is_lut_used = any(inp.purpose == TensorPurpose.LUT for inp in parent_op.inputs)
- max_ifm_shram_avail = (
- (self.arch.available_shram_banks(is_lut_used) - self.arch.shram_reserved_output_banks)
- * self.arch.shram_bank_size
- // 2
- )
-
- for idx, tens in enumerate(parent_op.inputs):
- if tens.mem_type not in (MemType.Scratch, MemType.Scratch_fast):
- # Tensor is in permanent storage
- # Only when permanent storage differs from feature map storage, there is a point moving the data
- if (
- tens.mem_area in self.arch.permanent_storage_mem_area
- and self.arch.permanent_storage_mem_area != self.arch.feature_map_storage_mem_area
- ) or tens.purpose == TensorPurpose.LUT:
- if tens.purpose == TensorPurpose.LUT or (
- # For elementwise broadcast
- tens.purpose == TensorPurpose.FeatureMap
- and sched_op.op_type.is_binary_elementwise_op()
- and tens.shape != []
- and sched_op.ifm.shape != sched_op.ofm.shape
- and parent_op.write_shape is None
- and tens.storage_size() > max_ifm_shram_avail
- ):
- only_vector_product_consumers = all(
- oper and oper.type.npu_block_type == NpuBlockType.VectorProduct
- for oper in tens.consumers()
- )
-
- if (not only_vector_product_consumers) or tens.purpose == TensorPurpose.LUT:
- new_tens = tens.clone_into_fast_storage(self.arch)
- if tens.purpose == TensorPurpose.LUT:
- new_tens.mem_area = MemArea.Shram
-
- new_tens.consumer_list.append(parent_op)
- parent_op.inputs[idx] = new_tens
- # If the index is out of range, IFM and IFM2 are the same tensor
- # and pass inputs don't have duplicates
- if idx < len(sched_op.parent_ps.inputs):
- sched_op.parent_ps.inputs[idx] = new_tens
-
def print_schedule(self, schedule: Schedule):
print(f"Schedule: '{schedule.name}'")
for sched_op in self.sched_ops:
@@ -1634,7 +1595,6 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o
scheduler.create_scheduler_representation(arch)
sg.sched_ops = scheduler.sched_ops
- scheduler.move_constant_data()
# Create the Max schedule template
max_schedule_template = scheduler.create_initial_schedule()