From 1c54ac1499da4b1c0de39336c1a9b22e506388b1 Mon Sep 17 00:00:00 2001 From: Raul Farkas Date: Wed, 26 Apr 2023 07:49:15 +0100 Subject: MLBEDSW-7390: Add verbose progress option Add --verbose-progress CLI option used to enable printing progress information in the compiler driver and scheduler. Change-Id: I99ac8c6a654e60391d5c11e28b89250405daa53a Signed-off-by: Raul Farkas --- ethosu/vela/compiler_driver.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'ethosu/vela/compiler_driver.py') diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py index d2892096..51c97070 100644 --- a/ethosu/vela/compiler_driver.py +++ b/ethosu/vela/compiler_driver.py @@ -40,6 +40,7 @@ from .scheduler import OptimizationStrategy from .tensor import MemArea from .tensor import MemType from .tensor import Tensor +from .utils import progress_print class CompilerOptions: @@ -62,6 +63,7 @@ class CompilerOptions: verbose_operators=False, verbose_weights=False, verbose_performance=False, + verbose_progress=False, show_cpu_operations=False, tensor_allocator=TensorAllocator.Greedy, timing=False, @@ -82,6 +84,7 @@ class CompilerOptions: self.verbose_operators = verbose_operators self.verbose_weights = verbose_weights self.verbose_performance = verbose_performance + self.verbose_progress = verbose_progress self.show_cpu_operations = show_cpu_operations self.tensor_allocator = tensor_allocator self.timing = timing @@ -154,11 +157,13 @@ def _check_schedule(nng, arch, scheduler_options): def compiler_driver(nng, arch, options, scheduler_options, network_type, output_basename): assert verify_graph_health(nng) + verbose_progress = scheduler_options.verbose_progress # Pre-optimisation operator tracking for sg in nng.subgraphs: visit_graph_post_order(sg.output_tensors, arch, [], [_record_operator]) + progress_print(verbose_progress, "Performing graph optimisation") nng = graph_optimiser.optimise_graph( nng, arch, network_type, options.verbose_graph, options.force_symmetric_int_weights ) @@ -167,17 +172,22 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ if options.verbose_quantization: nng.print_graph_with_tensor_quantization() + progress_print(verbose_progress, "Defining tensor purpose") nng = mark_tensors.mark_tensor_purpose(nng, arch, options.verbose_tensor_purpose) assert verify_graph_health(nng) + + progress_print(verbose_progress, "Performing pass packing") pass_packing.pack_into_passes(nng, arch, options.verbose_packing) assert verify_graph_health(nng) + progress_print(verbose_progress, "Extracting npu subgraphs") extract_npu_subgraphs.extract_npu_subgraphs(nng, arch) assert verify_graph_health(nng) if options.timing: start = time.time() + progress_print(verbose_progress, "Scheduling passes") # Run the scheduler scheduler.schedule_passes(nng, arch, options, scheduler_options) _check_schedule(nng, arch, scheduler_options) @@ -199,6 +209,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ # Create list of NPU subgraphs with same order as the list of all subgraphs npu_subgraphs = [sg for sg in nng.subgraphs if sg.placement == PassPlacement.Npu] + progress_print(verbose_progress, "Calculating live ranges for constant NPU tensors") # Calculate live ranges for all constant Npu tensors, in permanent storage for sg in npu_subgraphs: lr_graph_flash = live_range.create_linear_live_range_graph( @@ -209,6 +220,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ ) if npu_subgraphs: + progress_print(verbose_progress, "Allocating NPU constant tensors to the first NPU subgraph") # Allocate all Npu constant tensors to the first Npu subgraph since it is # processed first during serialization into tensors first_npu_sg = npu_subgraphs[0] @@ -225,6 +237,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ root_sg = nng.get_root_subgraph() + progress_print(verbose_progress, "Generating command stream") # Generate command streams and serialise Npu-ops into tensors for sg in npu_subgraphs: high_level_command_stream_generator.generate_high_level_command_stream_for_schedule( @@ -249,6 +262,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ if scratch_fast_tens is not None: scratch_fast_tens.set_all_shapes([root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0)]) + progress_print(verbose_progress, "Allocating CPU constant tensors") # Allocate all Cpu constant tensors, this is done last because the Npu-ops # have to be serialized into flash and scratch tensors first tensor_allocation.allocate_tensors( @@ -261,7 +275,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_ verbose_allocation=options.verbose_allocation, cpu_tensor_alignment=options.cpu_tensor_alignment, ) - + progress_print(verbose_progress, "Calculating new performance for the network") npu_performance.calc_new_performance_for_network( nng, arch, network_type, options.verbose_performance, output_basename ) -- cgit v1.2.1