diff options
Diffstat (limited to 'ethosu/vela/scheduler.py')
-rw-r--r-- | ethosu/vela/scheduler.py | 47 |
1 files changed, 24 insertions, 23 deletions
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index 2174a6e3..4befad49 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -416,6 +416,9 @@ class Scheduler: self.max_schedule: Optional[Schedule] = None self.scheduler_options = options + # sram limit can be changed when scheduling for Size + self.sram_limit = options.optimization_sram_limit + self.scratched_fms: Dict[Tensor, Any] = {} self.evicted_fms: List[live_range.LiveRange] = [] @@ -1045,9 +1048,7 @@ class Scheduler: cascade_builder = CascadeBuilder(sub_schedule_ops, self.arch.is_spilling_enabled(), non_local_mem_usage) # Start by adding buffering - buffered_sub_schedule = self.propose_schedule_buffering( - sub_schedule, self.scheduler_options.optimization_sram_limit - ) + buffered_sub_schedule = self.propose_schedule_buffering(sub_schedule, self.sram_limit) # Copy the cascades over from the unbuffered-schedule buffered_sub_schedule.cascades = sub_schedule.cascades @@ -1095,12 +1096,10 @@ class Scheduler: schedule: Schedule, max_sched: Schedule, max_template: Schedule, - options: SchedulerOptions, ) -> Schedule: """Extracts sub-schedules based on the cascades and optimizes them and applies them to the final schedule""" - verbose_progress = options.verbose_progress - sram_limit = options.optimization_sram_limit - if max_sched.fast_storage_peak_usage < sram_limit and not self.arch.is_spilling_enabled(): + verbose_progress = self.scheduler_options.verbose_progress + if max_sched.fast_storage_peak_usage < self.sram_limit and not self.arch.is_spilling_enabled(): # Maximum performance schedule fits within the SRAM target return max_sched @@ -1109,7 +1108,7 @@ class Scheduler: for index, cascade_info in enumerate(cascades): progress_print(verbose_progress, "Processing cascade", index, cascades) # Optimize the sub-schedule in this cascade - opt_sub_schedule = self.optimize_sub_schedule(cascade_info, schedule, max_template, sram_limit) + opt_sub_schedule = self.optimize_sub_schedule(cascade_info, schedule, max_template, self.sram_limit) if opt_sub_schedule: # Remove the existing cascade del schedule.cascades[cascade_info.end] @@ -1121,7 +1120,7 @@ class Scheduler: self.sg.schedule = schedule self.update_op_memory_snapshot(schedule) # Propose schedule buffering to the optimized schedule - optimized_sched = self.propose_schedule_buffering(schedule, self.scheduler_options.optimization_sram_limit) + optimized_sched = self.propose_schedule_buffering(schedule, self.sram_limit) # Copy the cascade's metadata from the unbuffered schedule optimized_sched.cascades = schedule.cascades return optimized_sched @@ -1129,9 +1128,8 @@ class Scheduler: def optimize_weight_buffering_size( self, min_schedule: Schedule, - options: SchedulerOptions, ): - verbose_progress = options.verbose_progress + verbose_progress = self.scheduler_options.verbose_progress default_schedule = self.sg.schedule npu_performance.calc_new_performance_for_network(self.nng, self.arch, None, False) default_tot_cycles = self.nng.cycles[npu_performance.PassCycles.Total] @@ -1181,14 +1179,14 @@ class Scheduler: self.update_op_memory_snapshot(self.sg.schedule) # Run schedule buffering - with weight buffer size reduction - schedule = self.propose_schedule_buffering(self.sg.schedule, options.optimization_sram_limit) + schedule = self.propose_schedule_buffering(self.sg.schedule, self.sram_limit) schedule.cascades = self.sg.schedule.cascades self.sg.schedule = schedule # Apply new buffer schdule and calc new performance self.update_op_memory_snapshot(self.sg.schedule) self.apply_schedule(self.sg.schedule) - self.use_fast_storage_for_feature_maps(self.sg.schedule, options.optimization_sram_limit) + self.use_fast_storage_for_feature_maps(self.sg.schedule, self.sram_limit) npu_performance.calc_new_performance_for_network(self.nng, self.arch, None, False) new_tot_cycles = self.nng.cycles[npu_performance.PassCycles.Total] @@ -1214,7 +1212,7 @@ class Scheduler: self.sg.schedule = default_schedule self.update_op_memory_snapshot(self.sg.schedule) self.apply_schedule(self.sg.schedule) - self.use_fast_storage_for_feature_maps(self.sg.schedule, options.optimization_sram_limit) + self.use_fast_storage_for_feature_maps(self.sg.schedule, self.sram_limit) def apply_schedule(self, sched: Schedule): """Applies the given schedule as a final solution""" @@ -1628,7 +1626,7 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o progress_print(verbose_progress, "Creating minimal schedule") # Create Min schedule min_schedule = scheduler.propose_minimal_schedule() - initial_sram_limit = scheduler_options.optimization_sram_limit + initial_sram_limit = scheduler.sram_limit if scheduler_options.optimization_strategy == OptimizationStrategy.Size: initial_sram_limit = scheduler.min_memory_req @@ -1638,22 +1636,25 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o sg.schedule = min_schedule scheduler.update_op_memory_snapshot(min_schedule) - if scheduler_options.optimization_strategy == OptimizationStrategy.Performance: + if scheduler_options.optimization_strategy == OptimizationStrategy.Size: progress_print(verbose_progress, "Creating schedule optimized for performance") - # Create an optimized schedule - sg.schedule = scheduler.optimize_schedule( - min_schedule, opt_max_schedule, max_schedule_template, scheduler_options - ) - scheduler.update_op_memory_snapshot(sg.schedule) + # Update sram limit to peak usage from the minimum scheduler when optimizing for Size. + # Then optimize schedule can be called for both OptimizationStrategy Performance and Size + # as long the max sram usage is <= scheduler.sram_limit + scheduler.sram_limit = min_schedule.fast_storage_peak_usage + + # Create an optimized schedule + sg.schedule = scheduler.optimize_schedule(min_schedule, opt_max_schedule, max_schedule_template) + scheduler.update_op_memory_snapshot(sg.schedule) scheduler.apply_schedule(sg.schedule) - scheduler.use_fast_storage_for_feature_maps(sg.schedule, scheduler_options.optimization_sram_limit) + scheduler.use_fast_storage_for_feature_maps(sg.schedule, scheduler.sram_limit) if scheduler_options.optimization_strategy == OptimizationStrategy.Performance and scheduler.evicted_fms: progress_print(verbose_progress, "Optimizing weight buffering size") # It might be possible to gain performance by reducing # weight buffer size and instead fit fms in fast storage - scheduler.optimize_weight_buffering_size(min_schedule, scheduler_options) + scheduler.optimize_weight_buffering_size(min_schedule) if scheduler_options.verbose_schedule: scheduler.print_schedule(sg.schedule) |