From 1c54ac1499da4b1c0de39336c1a9b22e506388b1 Mon Sep 17 00:00:00 2001 From: Raul Farkas Date: Wed, 26 Apr 2023 07:49:15 +0100 Subject: MLBEDSW-7390: Add verbose progress option Add --verbose-progress CLI option used to enable printing progress information in the compiler driver and scheduler. Change-Id: I99ac8c6a654e60391d5c11e28b89250405daa53a Signed-off-by: Raul Farkas --- OPTIONS.md | 8 ++++ ethosu/vela/compiler_driver.py | 16 +++++++- ethosu/vela/live_range.py | 10 +++-- ethosu/vela/scheduler.py | 75 +++++++++++++++++++++-------------- ethosu/vela/tensor_allocation.py | 6 ++- ethosu/vela/utils.py | 86 ++++++++++++++++++++++++++++++++++++++++ ethosu/vela/vela.py | 3 ++ 7 files changed, 169 insertions(+), 35 deletions(-) create mode 100644 ethosu/vela/utils.py diff --git a/OPTIONS.md b/OPTIONS.md index 36cd1722..9eaff235 100644 --- a/OPTIONS.md +++ b/OPTIONS.md @@ -432,6 +432,14 @@ Verbose weights information. vela network.tflite --verbose-weights ``` +### Verbose Progress + +Verbose progress information from the compiler driver and scheduler. + +```bash +vela network.tflite --verbose-progress +``` + ## Configuration File This is used to describe various properties of the Ethos-U embedded system. The diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py index d2892096..51c97070 100644 --- a/ethosu/vela/compiler_driver.py +++ b/ethosu/vela/compiler_driver.py @@ -40,6 +40,7 @@ from .scheduler import OptimizationStrategy from .tensor import MemArea from .tensor import MemType from .tensor import Tensor +from .utils import progress_print class CompilerOptions: @@ -62,6 +63,7 @@ class CompilerOptions: verbose_operators=False, verbose_weights=False, verbose_performance=False, + verbose_progress=False, show_cpu_operations=False, tensor_allocator=TensorAllocator.Greedy, timing=False, @@ -82,6 +84,7 @@ class CompilerOptions: self.verbose_operators = verbose_operators self.verbose_weights = verbose_weights self.verbose_performance = verbose_performance + self.verbose_progress = verbose_progress self.show_cpu_operations = show_cpu_operations self.tensor_allocator = tensor_allocator self.timing = timing @@ -154,11 +157,13 @@ def _check_schedule(nng, arch, scheduler_options): def compiler_driver(nng, arch, options, scheduler_options, network_type, output_basename): assert verify_graph_health(nng) + verbose_progress = scheduler_options.verbose_progress # Pre-optimisation operator tracking for sg in nng.subgraphs: visit_graph_post_order(sg.output_tensors, arch, [], [_record_operator]) + progress_print(verbose_progress, "Performing graph optimisation") nng = graph_optimiser.optimise_graph( nng, arch, network_type, options.verbose_graph, options.force_symmetric_int_weights ) @@ -167,17 +172,22 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ if options.verbose_quantization: nng.print_graph_with_tensor_quantization() + progress_print(verbose_progress, "Defining tensor purpose") nng = mark_tensors.mark_tensor_purpose(nng, arch, options.verbose_tensor_purpose) assert verify_graph_health(nng) + + progress_print(verbose_progress, "Performing pass packing") pass_packing.pack_into_passes(nng, arch, options.verbose_packing) assert verify_graph_health(nng) + progress_print(verbose_progress, "Extracting npu subgraphs") extract_npu_subgraphs.extract_npu_subgraphs(nng, arch) assert verify_graph_health(nng) if options.timing: start = time.time() + progress_print(verbose_progress, "Scheduling passes") # Run the scheduler scheduler.schedule_passes(nng, arch, options, scheduler_options) _check_schedule(nng, arch, scheduler_options) @@ -199,6 +209,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ # Create list of NPU subgraphs with same order as the list of all subgraphs npu_subgraphs = [sg for sg in nng.subgraphs if sg.placement == PassPlacement.Npu] + progress_print(verbose_progress, "Calculating live ranges for constant NPU tensors") # Calculate live ranges for all constant Npu tensors, in permanent storage for sg in npu_subgraphs: lr_graph_flash = live_range.create_linear_live_range_graph( @@ -209,6 +220,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ ) if npu_subgraphs: + progress_print(verbose_progress, "Allocating NPU constant tensors to the first NPU subgraph") # Allocate all Npu constant tensors to the first Npu subgraph since it is # processed first during serialization into tensors first_npu_sg = npu_subgraphs[0] @@ -225,6 +237,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ root_sg = nng.get_root_subgraph() + progress_print(verbose_progress, "Generating command stream") # Generate command streams and serialise Npu-ops into tensors for sg in npu_subgraphs: high_level_command_stream_generator.generate_high_level_command_stream_for_schedule( @@ -249,6 +262,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ if scratch_fast_tens is not None: scratch_fast_tens.set_all_shapes([root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0)]) + progress_print(verbose_progress, "Allocating CPU constant tensors") # Allocate all Cpu constant tensors, this is done last because the Npu-ops # have to be serialized into flash and scratch tensors first tensor_allocation.allocate_tensors( @@ -261,7 +275,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ verbose_allocation=options.verbose_allocation, cpu_tensor_alignment=options.cpu_tensor_alignment, ) - + progress_print(verbose_progress, "Calculating new performance for the network") npu_performance.calc_new_performance_for_network( nng, arch, network_type, options.verbose_performance, output_basename ) diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py index d64f68e0..9f94dd63 100644 --- a/ethosu/vela/live_range.py +++ b/ethosu/vela/live_range.py @@ -27,6 +27,7 @@ from .tensor import MemArea from .tensor import MemType from .tensor import Tensor from .tensor import TensorPurpose +from .utils import progress_print class LiveRange: @@ -231,6 +232,7 @@ def extract_live_ranges_from_cascaded_passes( target_mem_type_set, lr_graph=None, cpu_tensor_alignment=Tensor.AllocationQuantum, + verbose_progress: bool = False, ): if lr_graph is None: lr_graph = LiveRangeGraph() @@ -239,7 +241,8 @@ def extract_live_ranges_from_cascaded_passes( # if subgraph has been processed already, return the lr_graph as is return lr_graph - for cps in sg.cascaded_passes: + for index, cps in enumerate(sg.cascaded_passes): + progress_print(verbose_progress, "Processing cascaded pass", index, sg.cascaded_passes) cps.time = lr_graph.current_time time_for_pass = cps.time @@ -320,9 +323,10 @@ def create_linear_live_range_graph(sg, target_mem_area, target_mem_type_set, lr_ return lr_graph -def extract_live_ranges_from_schedule(sg, target_mem_area, target_mem_type_set, lr_graph): +def extract_live_ranges_from_schedule(sg, target_mem_area, target_mem_type_set, lr_graph, verbose_progress=False): time_for_cascade = {} - for sched_op in sg.sched_ops: + for index, sched_op in enumerate(sg.sched_ops): + progress_print(verbose_progress, "Processing SchedulerOp", index, sg.sched_ops) op_info = sg.schedule.cost_map[sched_op] cascade = op_info.cascade cascade_info = sg.schedule.cascades.get(cascade, None) diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py index cbd7ce44..6e2cd4a6 100644 --- a/ethosu/vela/scheduler.py +++ b/ethosu/vela/scheduler.py @@ -31,6 +31,8 @@ from typing import Optional from typing import Tuple from typing import TYPE_CHECKING +from .utils import progress_print + # Import needed for Type annotations. Only import for Type checking to avoid run-time errors due to cyclic import. if TYPE_CHECKING: from .npu_performance import CycleCost @@ -148,10 +150,12 @@ class SchedulerOptions: optimization_strategy, sram_target, verbose_schedule, + verbose_progress=False, ): self.optimization_strategy = optimization_strategy self.optimization_sram_limit = sram_target self.verbose_schedule = verbose_schedule + self.verbose_progress = verbose_progress def __str__(self) -> str: return f"{type(self).__name__}: {str(self.__dict__)}" @@ -531,7 +535,9 @@ class Scheduler: def create_initial_schedule(self) -> Schedule: """Creates an initial schedule with no cascading or buffering of any kind""" schedule = Schedule(self.sg, "MAX") - for op in self.sched_ops: + verbose_progress = self.scheduler_options.verbose_progress + for index, op in enumerate(self.sched_ops): + progress_print(verbose_progress, "Processing SchedulerOp", index, self.sched_ops) cost = op.create_scheduler_info(self.nng, op.ofm.shape) cost.cycles = self.estimate_op_performance(op, cost.block_config, op.ofm.shape.depth) schedule.cost_map[op] = cost @@ -540,16 +546,12 @@ class Scheduler: def update_op_memory_snapshot(self, schedule: Schedule): memories_list = [(self.arch.fast_storage_mem_area, set((MemType.Scratch, MemType.Scratch_fast)))] - + verbose_progress = self.scheduler_options.verbose_progress + progress_print(verbose_progress, "") # Collect live ranges from tensors lr_graph = live_range.LiveRangeGraph() for mem_area, mem_type_set in memories_list: - live_range.extract_live_ranges_from_schedule( - self.sg, - mem_area, - mem_type_set, - lr_graph, - ) + live_range.extract_live_ranges_from_schedule(self.sg, mem_area, mem_type_set, lr_graph, verbose_progress) # Populate time-array with memory used by live ranges temporal_usage = lr_graph.get_temporal_memory_usage(self.arch.fast_storage_mem_area) @@ -607,9 +609,10 @@ class Scheduler: def propose_schedule_buffering(self, ref_schedule: Schedule, staging_limit_bytes): """Create a buffered schedule""" buffered_schedule = Schedule(self.sg, f"{ref_schedule.label}_BUFFERED") - + verbose_progress = self.scheduler_options.verbose_progress prev_op = None - for sched_op in self.sched_ops: + for index, sched_op in enumerate(self.sched_ops): + progress_print(verbose_progress, "Processing SchedulerOp", index, self.sched_ops) if sched_op not in ref_schedule.cost_map: # sched_op is not part of this sub-schedule - skip continue @@ -871,10 +874,11 @@ class Scheduler: next operators stride""" min_schedule = Schedule(self.sg, "MIN") cost_map = min_schedule.cost_map - + verbose_progress = self.scheduler_options.verbose_progress # Keep track of the previous Op - which consumes the current Op's OFM prev_op: Optional[SchedulerOperation] = None - for sched_op in reversed(self.sched_ops): + for index, sched_op in enumerate(reversed(self.sched_ops)): + progress_print(verbose_progress, "Processing SchedulerOp", index, self.sched_ops) min_stripe_height = prev_op.kernel.stride.y if prev_op else 1 min_stripe = sched_op.ofm.shape.with_height(min_stripe_height) @@ -968,13 +972,15 @@ class Scheduler: return peak_mem_usage def build_cascades_for_min_schedule(self, min_schedule: Schedule, max_template: Schedule, memory_limit: int): + verbose_progress = self.scheduler_options.verbose_progress # Update memory snapshot self.sg.schedule = min_schedule self.update_op_memory_snapshot(min_schedule) # Calculate residual memory for Min schedule non_local_mem_usage = {} - for sched_op in self.sched_ops: + for index, sched_op in enumerate(self.sched_ops): + progress_print(verbose_progress, "Processing SchedulerOp", index, self.sched_ops) time_index = min_schedule.cost_map[sched_op].time_index if self.arch.is_spilling_enabled(): @@ -1089,13 +1095,16 @@ class Scheduler: options: SchedulerOptions, ) -> Schedule: """Extracts sub-schedules based on the cascades and optimizes them and applies them to the final schedule""" + verbose_progress = options.verbose_progress sram_limit = options.optimization_sram_limit if max_sched.fast_storage_peak_usage < sram_limit and not self.arch.is_spilling_enabled(): # Maximum performance schedule fits within the SRAM target return max_sched # Iterate over a copy of the cascades since they may change during the loop - for cascade_info in list(schedule.cascades.values()): + cascades = list(schedule.cascades.values()) + for index, cascade_info in enumerate(cascades): + progress_print(verbose_progress, "Processing cascade", index, cascades) # Optimize the sub-schedule in this cascade opt_sub_schedule = self.optimize_sub_schedule(cascade_info, schedule, max_template, sram_limit) if opt_sub_schedule: @@ -1119,6 +1128,7 @@ class Scheduler: min_schedule: Schedule, options: SchedulerOptions, ): + verbose_progress = options.verbose_progress default_schedule = self.sg.schedule npu_performance.calc_new_performance_for_network(self.nng, self.arch, None, False) default_tot_cycles = self.nng.cycles[npu_performance.PassCycles.Total] @@ -1135,12 +1145,7 @@ class Scheduler: memories_list = [(self.arch.fast_storage_mem_area, set((MemType.Scratch, MemType.Scratch_fast)))] lr_graph = live_range.LiveRangeGraph() for mem_area, mem_type_set in memories_list: - live_range.extract_live_ranges_from_schedule( - self.sg, - mem_area, - mem_type_set, - lr_graph, - ) + live_range.extract_live_ranges_from_schedule(self.sg, mem_area, mem_type_set, lr_graph, verbose_progress) # Find the relation between the sched_op and the buffering tensor weight_ops = {} @@ -1416,7 +1421,9 @@ class Scheduler: print(f"\t\t{i}: {cascade.start} -> {cascade.end}, size: {cascade.mem_usage}") -def _update_memory_snapshot_for_all_npu_graphs(nng: Graph, arch: ArchitectureFeatures, schedulers): +def _update_memory_snapshot_for_all_npu_graphs( + nng: Graph, arch: ArchitectureFeatures, schedulers, verbose_progress: bool = False +): mem_area = arch.fast_storage_mem_area mem_type_set = set((MemType.Scratch, MemType.Scratch_fast)) @@ -1426,11 +1433,7 @@ def _update_memory_snapshot_for_all_npu_graphs(nng: Graph, arch: ArchitectureFea # will be set for all the tensors. lr_graph = live_range.LiveRangeGraph() live_range.extract_live_ranges_from_cascaded_passes( - nng.get_root_subgraph(), - mem_area, - mem_type_set, - lr_graph, - Tensor.AllocationQuantum, + nng.get_root_subgraph(), mem_area, mem_type_set, lr_graph, Tensor.AllocationQuantum, verbose_progress ) # Populate time-array with memory used by live ranges temporal_usage = lr_graph.get_temporal_memory_usage(arch.fast_storage_mem_area) @@ -1471,6 +1474,7 @@ def _update_tensor_allocation(nng: Graph, arch: ArchitectureFeatures, options): mem_type_set, tensor_allocator=options.tensor_allocator, verbose_allocation=options.verbose_allocation, + verbose_progress=options.verbose_progress, cpu_tensor_alignment=options.cpu_tensor_alignment, hillclimb_max_iterations=options.hillclimb_max_iterations, ) @@ -1570,14 +1574,17 @@ class FastStorageComponentAllocator: def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_options: SchedulerOptions): """Entry point for the Scheduler""" + verbose_progress = scheduler_options.verbose_progress # Initialize CPU subgraphs schedulers = dict() # Initialize schedulers with max schedule. Only schedule NPU subgraphs - for sg in nng.subgraphs: + for sg_idx, sg in enumerate(nng.subgraphs): + progress_print(verbose_progress, "Processing subgraph", sg_idx, nng.subgraphs) if sg.placement != PassPlacement.Npu: # Create cascaded passes for CPU Ops cascaded_passes = [] - for idx, ps in enumerate(sg.passes): + for pass_idx, ps in enumerate(sg.passes): + progress_print(verbose_progress, "Creating cascaded passes for CPU op", pass_idx, sg.passes) cps = CascadedPass( ps.name, SchedulingStrategy.WeightStream, @@ -1589,7 +1596,7 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o False, ) - cps.time = idx + cps.time = pass_idx ps.cascade = cps cascaded_passes.append(cps) @@ -1599,6 +1606,7 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o scheduler = Scheduler(nng, sg, arch, scheduler_options) schedulers[sg] = scheduler + progress_print(verbose_progress, "Creating scheduler representation") scheduler.create_scheduler_representation(arch) sg.sched_ops = scheduler.sched_ops @@ -1606,6 +1614,7 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o max_schedule_template = scheduler.create_initial_schedule() scheduler.max_schedule = max_schedule_template + progress_print(verbose_progress, "Creating optimised max schedule") # Create the optimimised Max schedule sg.schedule = max_schedule_template scheduler.update_op_memory_snapshot(max_schedule_template) @@ -1613,6 +1622,7 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o sg.schedule = opt_max_schedule scheduler.update_op_memory_snapshot(opt_max_schedule) + progress_print(verbose_progress, "Creating minimal schedule") # Create Min schedule min_schedule = scheduler.propose_minimal_schedule() initial_sram_limit = scheduler_options.optimization_sram_limit @@ -1620,11 +1630,13 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o initial_sram_limit = scheduler.min_memory_req # Build cascades for Min schedule + progress_print(verbose_progress, "Building cascades for minimal schedule") scheduler.build_cascades_for_min_schedule(min_schedule, max_schedule_template, initial_sram_limit) sg.schedule = min_schedule scheduler.update_op_memory_snapshot(min_schedule) if scheduler_options.optimization_strategy == OptimizationStrategy.Performance: + progress_print(verbose_progress, "Creating schedule optimized for performance") # Create an optimized schedule sg.schedule = scheduler.optimize_schedule( min_schedule, opt_max_schedule, max_schedule_template, scheduler_options @@ -1635,6 +1647,7 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o scheduler.use_fast_storage_for_feature_maps(sg.schedule, scheduler_options.optimization_sram_limit) if scheduler_options.optimization_strategy == OptimizationStrategy.Performance and scheduler.evicted_fms: + progress_print(verbose_progress, "Optimizing weight buffering size") # It might be possible to gain performance by reducing # weight buffer size and instead fit fms in fast storage scheduler.optimize_weight_buffering_size(min_schedule, scheduler_options) @@ -1642,8 +1655,10 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o if scheduler_options.verbose_schedule: scheduler.print_schedule(sg.schedule) + progress_print(verbose_progress, "Update memory snapshot for all NPU graphs") # Make a full live range calculation starting from the root sg - _update_memory_snapshot_for_all_npu_graphs(nng, arch, schedulers) + _update_memory_snapshot_for_all_npu_graphs(nng, arch, schedulers, verbose_progress) + progress_print(verbose_progress, "Update tensor allocation") # Evaluate schedule _update_tensor_allocation(nng, arch, options) diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py index 8c91e2ed..fa9ace17 100644 --- a/ethosu/vela/tensor_allocation.py +++ b/ethosu/vela/tensor_allocation.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -201,6 +201,7 @@ def allocate( lr_graph=None, cpu_tensor_alignment=Tensor.AllocationQuantum, hillclimb_max_iterations=None, + verbose_progress=False, ): # Allocates addresses to tensors, returns False if tensors could not be fit within max_size lrs = live_range.extract_live_ranges_from_cascaded_passes( @@ -209,6 +210,7 @@ def allocate( mem_type_set, lr_graph=lr_graph, cpu_tensor_alignment=cpu_tensor_alignment, + verbose_progress=verbose_progress, ) total_sz = 0 if lrs.ranges: @@ -235,6 +237,7 @@ def allocate_tensors( mem_type_set, tensor_allocator=TensorAllocator.Greedy, verbose_allocation=False, + verbose_progress=False, lr_graph=None, cpu_tensor_alignment=Tensor.AllocationQuantum, hillclimb_max_iterations=None, @@ -251,6 +254,7 @@ def allocate_tensors( lr_graph=lr_graph, cpu_tensor_alignment=cpu_tensor_alignment, hillclimb_max_iterations=hillclimb_max_iterations, + verbose_progress=verbose_progress, ) if lrs.ranges: diff --git a/ethosu/vela/utils.py b/ethosu/vela/utils.py new file mode 100644 index 00000000..386ba354 --- /dev/null +++ b/ethosu/vela/utils.py @@ -0,0 +1,86 @@ +# SPDX-FileCopyrightText: Copyright 2023 Arm Limited and/or its affiliates +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Description: +# Contains various utility functions used across the codebase. +from __future__ import annotations + +import collections +import inspect + + +def progress_print( + enabled: bool, + message: str, + progress_counter: int = -1, + progress_total: int | collections.Sized = 0, + progress_granularity: float = 0.20, +): + """Print progress information. + + :param enabled: boolean indicating whether message should be printed. + :param message: message to be printed + :param progress_counter: the value of the incremental counter that indicates the progress + :param progress_total: integer value or sized data structure to use to extract the total number of elements that + progress is measured against + :param progress_granularity: floating point percentage indicating how often progress information should be printed + :param enable_context: boolean used to indicate whether context information should be printed with the message + + Example + ------- + def example_function(verbose_progress: bool = True): + a_list = [x for x in range(101)] + for index, value in a: + progress_print(verbose_progress, + message="Processing", + progress_counter=index, + progress_total=a_list, + progress_granulrity=0.25, + enable_context=True) + + **Output** + Processing 0/100 + Processing 25/100 + Processing 50/100 + Processing 75/100 + Processing 100/100 + """ + if not enabled: + return + + context_str = "" + # Get calling function name + context_str = inspect.stack()[1].function + context_str += ": " if message else "" + display_total = progress_total + # If a sized collection is provided, extract its size to use as progress total + if isinstance(progress_total, collections.Sized): + progress_total = len(progress_total) + display_total = progress_total - 1 + + # Print progress information with "counter/total" information + if progress_counter > -1 and progress_total > 0 and 0 < progress_granularity < 1: + # Extract progress frequency and ensure it is not equal to 0 (avoid zero division) + progress_frequency = int(progress_total * progress_granularity) + progress_frequency = progress_frequency if progress_frequency else 1 + # Check whether information should be printed based on computed progress frequency + if ( + progress_counter % progress_frequency == 0 and progress_counter <= progress_total - progress_frequency + ) or progress_counter == display_total: + print(f"{context_str}{message} {progress_counter}/{display_total}") + return + + print(f"{context_str}{message}") diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py index c44c7894..fbf1d370 100644 --- a/ethosu/vela/vela.py +++ b/ethosu/vela/vela.py @@ -372,6 +372,7 @@ def main(args=None): parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list") parser.add_argument("--verbose-weights", action="store_true", help="Verbose weights information") parser.add_argument("--verbose-performance", action="store_true", help="Verbose performance information") + parser.add_argument("--verbose-progress", action="store_true", help="Verbose progress information") parser.add_argument( "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU" ) @@ -555,6 +556,7 @@ def main(args=None): verbose_operators=args.verbose_operators, verbose_weights=args.verbose_weights, verbose_performance=args.verbose_performance, + verbose_progress=args.verbose_progress, show_cpu_operations=args.show_cpu_operations, tensor_allocator=args.tensor_allocator, timing=args.timing, @@ -568,6 +570,7 @@ def main(args=None): optimization_strategy=args.optimise, sram_target=arch.arena_cache_size, verbose_schedule=args.verbose_schedule, + verbose_progress=args.verbose_progress, ) model_reader_options = model_reader.ModelReaderOptions() -- cgit v1.2.1