diff options
Diffstat (limited to 'ethosu/vela/scheduler.py')
-rw-r--r-- | ethosu/vela/scheduler.py | 56 |
1 files changed, 8 insertions, 48 deletions
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index a50f262e..16531c2c 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -60,7 +60,6 @@ from .nn_graph import Subgraph from .live_range import ofm_can_reuse_ifm from .numeric_util import round_down from .numeric_util import round_up -from .operation import NpuBlockType from .operation import Op from .shape4d import Shape4D from .tensor import MemArea @@ -213,6 +212,14 @@ class SchedulerOperation: ps.ofm_tensor.format, ) + # LUT must be placed in shram area. The copy is done by DMA + # generated by the high level command stream generator. + for idx, tens in enumerate(self.parent_op.inputs): + if tens.purpose == TensorPurpose.LUT: + new_tens = tens.clone_into_shram(self.arch) + new_tens.consumer_list.append(self.parent_op) + self.parent_op.inputs[idx] = new_tens + # Input volume width and height required to produce the smallest possible stripe self.min_stripe_input_w, self.min_stripe_input_h = self._calculate_min_stripe_input() @@ -1379,52 +1386,6 @@ class Scheduler: ) assert max(max_mem_usage) <= staging_limit, "Allocation exceeds staging limit" - def move_constant_data(self): - """Determine if data can be moved from permanent storage to another memory area. A move will generate a DMA - command in the high-level command stream""" - for sched_op in self.sched_ops: - parent_op = sched_op.parent_op - is_lut_used = any(inp.purpose == TensorPurpose.LUT for inp in parent_op.inputs) - max_ifm_shram_avail = ( - (self.arch.available_shram_banks(is_lut_used) - self.arch.shram_reserved_output_banks) - * self.arch.shram_bank_size - // 2 - ) - - for idx, tens in enumerate(parent_op.inputs): - if tens.mem_type not in (MemType.Scratch, MemType.Scratch_fast): - # Tensor is in permanent storage - # Only when permanent storage differs from feature map storage, there is a point moving the data - if ( - tens.mem_area in self.arch.permanent_storage_mem_area - and self.arch.permanent_storage_mem_area != self.arch.feature_map_storage_mem_area - ) or tens.purpose == TensorPurpose.LUT: - if tens.purpose == TensorPurpose.LUT or ( - # For elementwise broadcast - tens.purpose == TensorPurpose.FeatureMap - and sched_op.op_type.is_binary_elementwise_op() - and tens.shape != [] - and sched_op.ifm.shape != sched_op.ofm.shape - and parent_op.write_shape is None - and tens.storage_size() > max_ifm_shram_avail - ): - only_vector_product_consumers = all( - oper and oper.type.npu_block_type == NpuBlockType.VectorProduct - for oper in tens.consumers() - ) - - if (not only_vector_product_consumers) or tens.purpose == TensorPurpose.LUT: - new_tens = tens.clone_into_fast_storage(self.arch) - if tens.purpose == TensorPurpose.LUT: - new_tens.mem_area = MemArea.Shram - - new_tens.consumer_list.append(parent_op) - parent_op.inputs[idx] = new_tens - # If the index is out of range, IFM and IFM2 are the same tensor - # and pass inputs don't have duplicates - if idx < len(sched_op.parent_ps.inputs): - sched_op.parent_ps.inputs[idx] = new_tens - def print_schedule(self, schedule: Schedule): print(f"Schedule: '{schedule.name}'") for sched_op in self.sched_ops: @@ -1634,7 +1595,6 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o scheduler.create_scheduler_representation(arch) sg.sched_ops = scheduler.sched_ops - scheduler.move_constant_data() # Create the Max schedule template max_schedule_template = scheduler.create_initial_schedule() |