aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRaul Farkas <raul.farkas@arm.com>2023-04-26 07:49:15 +0100
committerRaul Farkas <raul.farkas@arm.com>2023-05-15 11:56:48 +0100
commit1c54ac1499da4b1c0de39336c1a9b22e506388b1 (patch)
treed3aacca523c9dcf783acdcf7da9ec8921f3c4d05
parentfd6f624870b446207b4436cda5bd93dd4ad577ff (diff)
downloadethos-u-vela-1c54ac1499da4b1c0de39336c1a9b22e506388b1.tar.gz
MLBEDSW-7390: Add verbose progress option
Add --verbose-progress CLI option used to enable printing progress information in the compiler driver and scheduler. Change-Id: I99ac8c6a654e60391d5c11e28b89250405daa53a Signed-off-by: Raul Farkas <raul.farkas@arm.com>
-rw-r--r--OPTIONS.md8
-rw-r--r--ethosu/vela/compiler_driver.py16
-rw-r--r--ethosu/vela/live_range.py10
-rw-r--r--ethosu/vela/scheduler.py75
-rw-r--r--ethosu/vela/tensor_allocation.py6
-rw-r--r--ethosu/vela/utils.py86
-rw-r--r--ethosu/vela/vela.py3
7 files changed, 169 insertions, 35 deletions
diff --git a/OPTIONS.md b/OPTIONS.md
index 36cd172..9eaff23 100644
--- a/OPTIONS.md
+++ b/OPTIONS.md
@@ -432,6 +432,14 @@ Verbose weights information.
vela network.tflite --verbose-weights
```
+### Verbose Progress
+
+Verbose progress information from the compiler driver and scheduler.
+
+```bash
+vela network.tflite --verbose-progress
+```
+
## Configuration File
This is used to describe various properties of the Ethos-U embedded system. The
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index d289209..51c9707 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -40,6 +40,7 @@ from .scheduler import OptimizationStrategy
from .tensor import MemArea
from .tensor import MemType
from .tensor import Tensor
+from .utils import progress_print
class CompilerOptions:
@@ -62,6 +63,7 @@ class CompilerOptions:
verbose_operators=False,
verbose_weights=False,
verbose_performance=False,
+ verbose_progress=False,
show_cpu_operations=False,
tensor_allocator=TensorAllocator.Greedy,
timing=False,
@@ -82,6 +84,7 @@ class CompilerOptions:
self.verbose_operators = verbose_operators
self.verbose_weights = verbose_weights
self.verbose_performance = verbose_performance
+ self.verbose_progress = verbose_progress
self.show_cpu_operations = show_cpu_operations
self.tensor_allocator = tensor_allocator
self.timing = timing
@@ -154,11 +157,13 @@ def _check_schedule(nng, arch, scheduler_options):
def compiler_driver(nng, arch, options, scheduler_options, network_type, output_basename):
assert verify_graph_health(nng)
+ verbose_progress = scheduler_options.verbose_progress
# Pre-optimisation operator tracking
for sg in nng.subgraphs:
visit_graph_post_order(sg.output_tensors, arch, [], [_record_operator])
+ progress_print(verbose_progress, "Performing graph optimisation")
nng = graph_optimiser.optimise_graph(
nng, arch, network_type, options.verbose_graph, options.force_symmetric_int_weights
)
@@ -167,17 +172,22 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_
if options.verbose_quantization:
nng.print_graph_with_tensor_quantization()
+ progress_print(verbose_progress, "Defining tensor purpose")
nng = mark_tensors.mark_tensor_purpose(nng, arch, options.verbose_tensor_purpose)
assert verify_graph_health(nng)
+
+ progress_print(verbose_progress, "Performing pass packing")
pass_packing.pack_into_passes(nng, arch, options.verbose_packing)
assert verify_graph_health(nng)
+ progress_print(verbose_progress, "Extracting npu subgraphs")
extract_npu_subgraphs.extract_npu_subgraphs(nng, arch)
assert verify_graph_health(nng)
if options.timing:
start = time.time()
+ progress_print(verbose_progress, "Scheduling passes")
# Run the scheduler
scheduler.schedule_passes(nng, arch, options, scheduler_options)
_check_schedule(nng, arch, scheduler_options)
@@ -199,6 +209,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_
# Create list of NPU subgraphs with same order as the list of all subgraphs
npu_subgraphs = [sg for sg in nng.subgraphs if sg.placement == PassPlacement.Npu]
+ progress_print(verbose_progress, "Calculating live ranges for constant NPU tensors")
# Calculate live ranges for all constant Npu tensors, in permanent storage
for sg in npu_subgraphs:
lr_graph_flash = live_range.create_linear_live_range_graph(
@@ -209,6 +220,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_
)
if npu_subgraphs:
+ progress_print(verbose_progress, "Allocating NPU constant tensors to the first NPU subgraph")
# Allocate all Npu constant tensors to the first Npu subgraph since it is
# processed first during serialization into tensors
first_npu_sg = npu_subgraphs[0]
@@ -225,6 +237,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_
root_sg = nng.get_root_subgraph()
+ progress_print(verbose_progress, "Generating command stream")
# Generate command streams and serialise Npu-ops into tensors
for sg in npu_subgraphs:
high_level_command_stream_generator.generate_high_level_command_stream_for_schedule(
@@ -249,6 +262,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_
if scratch_fast_tens is not None:
scratch_fast_tens.set_all_shapes([root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0)])
+ progress_print(verbose_progress, "Allocating CPU constant tensors")
# Allocate all Cpu constant tensors, this is done last because the Npu-ops
# have to be serialized into flash and scratch tensors first
tensor_allocation.allocate_tensors(
@@ -261,7 +275,7 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_
verbose_allocation=options.verbose_allocation,
cpu_tensor_alignment=options.cpu_tensor_alignment,
)
-
+ progress_print(verbose_progress, "Calculating new performance for the network")
npu_performance.calc_new_performance_for_network(
nng, arch, network_type, options.verbose_performance, output_basename
)
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py
index d64f68e..9f94dd6 100644
--- a/ethosu/vela/live_range.py
+++ b/ethosu/vela/live_range.py
@@ -27,6 +27,7 @@ from .tensor import MemArea
from .tensor import MemType
from .tensor import Tensor
from .tensor import TensorPurpose
+from .utils import progress_print
class LiveRange:
@@ -231,6 +232,7 @@ def extract_live_ranges_from_cascaded_passes(
target_mem_type_set,
lr_graph=None,
cpu_tensor_alignment=Tensor.AllocationQuantum,
+ verbose_progress: bool = False,
):
if lr_graph is None:
lr_graph = LiveRangeGraph()
@@ -239,7 +241,8 @@ def extract_live_ranges_from_cascaded_passes(
# if subgraph has been processed already, return the lr_graph as is
return lr_graph
- for cps in sg.cascaded_passes:
+ for index, cps in enumerate(sg.cascaded_passes):
+ progress_print(verbose_progress, "Processing cascaded pass", index, sg.cascaded_passes)
cps.time = lr_graph.current_time
time_for_pass = cps.time
@@ -320,9 +323,10 @@ def create_linear_live_range_graph(sg, target_mem_area, target_mem_type_set, lr_
return lr_graph
-def extract_live_ranges_from_schedule(sg, target_mem_area, target_mem_type_set, lr_graph):
+def extract_live_ranges_from_schedule(sg, target_mem_area, target_mem_type_set, lr_graph, verbose_progress=False):
time_for_cascade = {}
- for sched_op in sg.sched_ops:
+ for index, sched_op in enumerate(sg.sched_ops):
+ progress_print(verbose_progress, "Processing SchedulerOp", index, sg.sched_ops)
op_info = sg.schedule.cost_map[sched_op]
cascade = op_info.cascade
cascade_info = sg.schedule.cascades.get(cascade, None)
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index cbd7ce4..6e2cd4a 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -31,6 +31,8 @@ from typing import Optional
from typing import Tuple
from typing import TYPE_CHECKING
+from .utils import progress_print
+
# Import needed for Type annotations. Only import for Type checking to avoid run-time errors due to cyclic import.
if TYPE_CHECKING:
from .npu_performance import CycleCost
@@ -148,10 +150,12 @@ class SchedulerOptions:
optimization_strategy,
sram_target,
verbose_schedule,
+ verbose_progress=False,
):
self.optimization_strategy = optimization_strategy
self.optimization_sram_limit = sram_target
self.verbose_schedule = verbose_schedule
+ self.verbose_progress = verbose_progress
def __str__(self) -> str:
return f"{type(self).__name__}: {str(self.__dict__)}"
@@ -531,7 +535,9 @@ class Scheduler:
def create_initial_schedule(self) -> Schedule:
"""Creates an initial schedule with no cascading or buffering of any kind"""
schedule = Schedule(self.sg, "MAX")
- for op in self.sched_ops:
+ verbose_progress = self.scheduler_options.verbose_progress
+ for index, op in enumerate(self.sched_ops):
+ progress_print(verbose_progress, "Processing SchedulerOp", index, self.sched_ops)
cost = op.create_scheduler_info(self.nng, op.ofm.shape)
cost.cycles = self.estimate_op_performance(op, cost.block_config, op.ofm.shape.depth)
schedule.cost_map[op] = cost
@@ -540,16 +546,12 @@ class Scheduler:
def update_op_memory_snapshot(self, schedule: Schedule):
memories_list = [(self.arch.fast_storage_mem_area, set((MemType.Scratch, MemType.Scratch_fast)))]
-
+ verbose_progress = self.scheduler_options.verbose_progress
+ progress_print(verbose_progress, "")
# Collect live ranges from tensors
lr_graph = live_range.LiveRangeGraph()
for mem_area, mem_type_set in memories_list:
- live_range.extract_live_ranges_from_schedule(
- self.sg,
- mem_area,
- mem_type_set,
- lr_graph,
- )
+ live_range.extract_live_ranges_from_schedule(self.sg, mem_area, mem_type_set, lr_graph, verbose_progress)
# Populate time-array with memory used by live ranges
temporal_usage = lr_graph.get_temporal_memory_usage(self.arch.fast_storage_mem_area)
@@ -607,9 +609,10 @@ class Scheduler:
def propose_schedule_buffering(self, ref_schedule: Schedule, staging_limit_bytes):
"""Create a buffered schedule"""
buffered_schedule = Schedule(self.sg, f"{ref_schedule.label}_BUFFERED")
-
+ verbose_progress = self.scheduler_options.verbose_progress
prev_op = None
- for sched_op in self.sched_ops:
+ for index, sched_op in enumerate(self.sched_ops):
+ progress_print(verbose_progress, "Processing SchedulerOp", index, self.sched_ops)
if sched_op not in ref_schedule.cost_map:
# sched_op is not part of this sub-schedule - skip
continue
@@ -871,10 +874,11 @@ class Scheduler:
next operators stride"""
min_schedule = Schedule(self.sg, "MIN")
cost_map = min_schedule.cost_map
-
+ verbose_progress = self.scheduler_options.verbose_progress
# Keep track of the previous Op - which consumes the current Op's OFM
prev_op: Optional[SchedulerOperation] = None
- for sched_op in reversed(self.sched_ops):
+ for index, sched_op in enumerate(reversed(self.sched_ops)):
+ progress_print(verbose_progress, "Processing SchedulerOp", index, self.sched_ops)
min_stripe_height = prev_op.kernel.stride.y if prev_op else 1
min_stripe = sched_op.ofm.shape.with_height(min_stripe_height)
@@ -968,13 +972,15 @@ class Scheduler:
return peak_mem_usage
def build_cascades_for_min_schedule(self, min_schedule: Schedule, max_template: Schedule, memory_limit: int):
+ verbose_progress = self.scheduler_options.verbose_progress
# Update memory snapshot
self.sg.schedule = min_schedule
self.update_op_memory_snapshot(min_schedule)
# Calculate residual memory for Min schedule
non_local_mem_usage = {}
- for sched_op in self.sched_ops:
+ for index, sched_op in enumerate(self.sched_ops):
+ progress_print(verbose_progress, "Processing SchedulerOp", index, self.sched_ops)
time_index = min_schedule.cost_map[sched_op].time_index
if self.arch.is_spilling_enabled():
@@ -1089,13 +1095,16 @@ class Scheduler:
options: SchedulerOptions,
) -> Schedule:
"""Extracts sub-schedules based on the cascades and optimizes them and applies them to the final schedule"""
+ verbose_progress = options.verbose_progress
sram_limit = options.optimization_sram_limit
if max_sched.fast_storage_peak_usage < sram_limit and not self.arch.is_spilling_enabled():
# Maximum performance schedule fits within the SRAM target
return max_sched
# Iterate over a copy of the cascades since they may change during the loop
- for cascade_info in list(schedule.cascades.values()):
+ cascades = list(schedule.cascades.values())
+ for index, cascade_info in enumerate(cascades):
+ progress_print(verbose_progress, "Processing cascade", index, cascades)
# Optimize the sub-schedule in this cascade
opt_sub_schedule = self.optimize_sub_schedule(cascade_info, schedule, max_template, sram_limit)
if opt_sub_schedule:
@@ -1119,6 +1128,7 @@ class Scheduler:
min_schedule: Schedule,
options: SchedulerOptions,
):
+ verbose_progress = options.verbose_progress
default_schedule = self.sg.schedule
npu_performance.calc_new_performance_for_network(self.nng, self.arch, None, False)
default_tot_cycles = self.nng.cycles[npu_performance.PassCycles.Total]
@@ -1135,12 +1145,7 @@ class Scheduler:
memories_list = [(self.arch.fast_storage_mem_area, set((MemType.Scratch, MemType.Scratch_fast)))]
lr_graph = live_range.LiveRangeGraph()
for mem_area, mem_type_set in memories_list:
- live_range.extract_live_ranges_from_schedule(
- self.sg,
- mem_area,
- mem_type_set,
- lr_graph,
- )
+ live_range.extract_live_ranges_from_schedule(self.sg, mem_area, mem_type_set, lr_graph, verbose_progress)
# Find the relation between the sched_op and the buffering tensor
weight_ops = {}
@@ -1416,7 +1421,9 @@ class Scheduler:
print(f"\t\t{i}: {cascade.start} -> {cascade.end}, size: {cascade.mem_usage}")
-def _update_memory_snapshot_for_all_npu_graphs(nng: Graph, arch: ArchitectureFeatures, schedulers):
+def _update_memory_snapshot_for_all_npu_graphs(
+ nng: Graph, arch: ArchitectureFeatures, schedulers, verbose_progress: bool = False
+):
mem_area = arch.fast_storage_mem_area
mem_type_set = set((MemType.Scratch, MemType.Scratch_fast))
@@ -1426,11 +1433,7 @@ def _update_memory_snapshot_for_all_npu_graphs(nng: Graph, arch: ArchitectureFea
# will be set for all the tensors.
lr_graph = live_range.LiveRangeGraph()
live_range.extract_live_ranges_from_cascaded_passes(
- nng.get_root_subgraph(),
- mem_area,
- mem_type_set,
- lr_graph,
- Tensor.AllocationQuantum,
+ nng.get_root_subgraph(), mem_area, mem_type_set, lr_graph, Tensor.AllocationQuantum, verbose_progress
)
# Populate time-array with memory used by live ranges
temporal_usage = lr_graph.get_temporal_memory_usage(arch.fast_storage_mem_area)
@@ -1471,6 +1474,7 @@ def _update_tensor_allocation(nng: Graph, arch: ArchitectureFeatures, options):
mem_type_set,
tensor_allocator=options.tensor_allocator,
verbose_allocation=options.verbose_allocation,
+ verbose_progress=options.verbose_progress,
cpu_tensor_alignment=options.cpu_tensor_alignment,
hillclimb_max_iterations=options.hillclimb_max_iterations,
)
@@ -1570,14 +1574,17 @@ class FastStorageComponentAllocator:
def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_options: SchedulerOptions):
"""Entry point for the Scheduler"""
+ verbose_progress = scheduler_options.verbose_progress
# Initialize CPU subgraphs
schedulers = dict()
# Initialize schedulers with max schedule. Only schedule NPU subgraphs
- for sg in nng.subgraphs:
+ for sg_idx, sg in enumerate(nng.subgraphs):
+ progress_print(verbose_progress, "Processing subgraph", sg_idx, nng.subgraphs)
if sg.placement != PassPlacement.Npu:
# Create cascaded passes for CPU Ops
cascaded_passes = []
- for idx, ps in enumerate(sg.passes):
+ for pass_idx, ps in enumerate(sg.passes):
+ progress_print(verbose_progress, "Creating cascaded passes for CPU op", pass_idx, sg.passes)
cps = CascadedPass(
ps.name,
SchedulingStrategy.WeightStream,
@@ -1589,7 +1596,7 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o
False,
)
- cps.time = idx
+ cps.time = pass_idx
ps.cascade = cps
cascaded_passes.append(cps)
@@ -1599,6 +1606,7 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o
scheduler = Scheduler(nng, sg, arch, scheduler_options)
schedulers[sg] = scheduler
+ progress_print(verbose_progress, "Creating scheduler representation")
scheduler.create_scheduler_representation(arch)
sg.sched_ops = scheduler.sched_ops
@@ -1606,6 +1614,7 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o
max_schedule_template = scheduler.create_initial_schedule()
scheduler.max_schedule = max_schedule_template
+ progress_print(verbose_progress, "Creating optimised max schedule")
# Create the optimimised Max schedule
sg.schedule = max_schedule_template
scheduler.update_op_memory_snapshot(max_schedule_template)
@@ -1613,6 +1622,7 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o
sg.schedule = opt_max_schedule
scheduler.update_op_memory_snapshot(opt_max_schedule)
+ progress_print(verbose_progress, "Creating minimal schedule")
# Create Min schedule
min_schedule = scheduler.propose_minimal_schedule()
initial_sram_limit = scheduler_options.optimization_sram_limit
@@ -1620,11 +1630,13 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o
initial_sram_limit = scheduler.min_memory_req
# Build cascades for Min schedule
+ progress_print(verbose_progress, "Building cascades for minimal schedule")
scheduler.build_cascades_for_min_schedule(min_schedule, max_schedule_template, initial_sram_limit)
sg.schedule = min_schedule
scheduler.update_op_memory_snapshot(min_schedule)
if scheduler_options.optimization_strategy == OptimizationStrategy.Performance:
+ progress_print(verbose_progress, "Creating schedule optimized for performance")
# Create an optimized schedule
sg.schedule = scheduler.optimize_schedule(
min_schedule, opt_max_schedule, max_schedule_template, scheduler_options
@@ -1635,6 +1647,7 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o
scheduler.use_fast_storage_for_feature_maps(sg.schedule, scheduler_options.optimization_sram_limit)
if scheduler_options.optimization_strategy == OptimizationStrategy.Performance and scheduler.evicted_fms:
+ progress_print(verbose_progress, "Optimizing weight buffering size")
# It might be possible to gain performance by reducing
# weight buffer size and instead fit fms in fast storage
scheduler.optimize_weight_buffering_size(min_schedule, scheduler_options)
@@ -1642,8 +1655,10 @@ def schedule_passes(nng: Graph, arch: ArchitectureFeatures, options, scheduler_o
if scheduler_options.verbose_schedule:
scheduler.print_schedule(sg.schedule)
+ progress_print(verbose_progress, "Update memory snapshot for all NPU graphs")
# Make a full live range calculation starting from the root sg
- _update_memory_snapshot_for_all_npu_graphs(nng, arch, schedulers)
+ _update_memory_snapshot_for_all_npu_graphs(nng, arch, schedulers, verbose_progress)
+ progress_print(verbose_progress, "Update tensor allocation")
# Evaluate schedule
_update_tensor_allocation(nng, arch, options)
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py
index 8c91e2e..fa9ace1 100644
--- a/ethosu/vela/tensor_allocation.py
+++ b/ethosu/vela/tensor_allocation.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -201,6 +201,7 @@ def allocate(
lr_graph=None,
cpu_tensor_alignment=Tensor.AllocationQuantum,
hillclimb_max_iterations=None,
+ verbose_progress=False,
):
# Allocates addresses to tensors, returns False if tensors could not be fit within max_size
lrs = live_range.extract_live_ranges_from_cascaded_passes(
@@ -209,6 +210,7 @@ def allocate(
mem_type_set,
lr_graph=lr_graph,
cpu_tensor_alignment=cpu_tensor_alignment,
+ verbose_progress=verbose_progress,
)
total_sz = 0
if lrs.ranges:
@@ -235,6 +237,7 @@ def allocate_tensors(
mem_type_set,
tensor_allocator=TensorAllocator.Greedy,
verbose_allocation=False,
+ verbose_progress=False,
lr_graph=None,
cpu_tensor_alignment=Tensor.AllocationQuantum,
hillclimb_max_iterations=None,
@@ -251,6 +254,7 @@ def allocate_tensors(
lr_graph=lr_graph,
cpu_tensor_alignment=cpu_tensor_alignment,
hillclimb_max_iterations=hillclimb_max_iterations,
+ verbose_progress=verbose_progress,
)
if lrs.ranges:
diff --git a/ethosu/vela/utils.py b/ethosu/vela/utils.py
new file mode 100644
index 0000000..386ba35
--- /dev/null
+++ b/ethosu/vela/utils.py
@@ -0,0 +1,86 @@
+# SPDX-FileCopyrightText: Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Description:
+# Contains various utility functions used across the codebase.
+from __future__ import annotations
+
+import collections
+import inspect
+
+
+def progress_print(
+ enabled: bool,
+ message: str,
+ progress_counter: int = -1,
+ progress_total: int | collections.Sized = 0,
+ progress_granularity: float = 0.20,
+):
+ """Print progress information.
+
+ :param enabled: boolean indicating whether message should be printed.
+ :param message: message to be printed
+ :param progress_counter: the value of the incremental counter that indicates the progress
+ :param progress_total: integer value or sized data structure to use to extract the total number of elements that
+ progress is measured against
+ :param progress_granularity: floating point percentage indicating how often progress information should be printed
+ :param enable_context: boolean used to indicate whether context information should be printed with the message
+
+ Example
+ -------
+ def example_function(verbose_progress: bool = True):
+ a_list = [x for x in range(101)]
+ for index, value in a:
+ progress_print(verbose_progress,
+ message="Processing",
+ progress_counter=index,
+ progress_total=a_list,
+ progress_granulrity=0.25,
+ enable_context=True)
+
+ **Output**
+ Processing 0/100
+ Processing 25/100
+ Processing 50/100
+ Processing 75/100
+ Processing 100/100
+ """
+ if not enabled:
+ return
+
+ context_str = ""
+ # Get calling function name
+ context_str = inspect.stack()[1].function
+ context_str += ": " if message else ""
+ display_total = progress_total
+ # If a sized collection is provided, extract its size to use as progress total
+ if isinstance(progress_total, collections.Sized):
+ progress_total = len(progress_total)
+ display_total = progress_total - 1
+
+ # Print progress information with "counter/total" information
+ if progress_counter > -1 and progress_total > 0 and 0 < progress_granularity < 1:
+ # Extract progress frequency and ensure it is not equal to 0 (avoid zero division)
+ progress_frequency = int(progress_total * progress_granularity)
+ progress_frequency = progress_frequency if progress_frequency else 1
+ # Check whether information should be printed based on computed progress frequency
+ if (
+ progress_counter % progress_frequency == 0 and progress_counter <= progress_total - progress_frequency
+ ) or progress_counter == display_total:
+ print(f"{context_str}{message} {progress_counter}/{display_total}")
+ return
+
+ print(f"{context_str}{message}")
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index c44c789..fbf1d37 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -372,6 +372,7 @@ def main(args=None):
parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
parser.add_argument("--verbose-weights", action="store_true", help="Verbose weights information")
parser.add_argument("--verbose-performance", action="store_true", help="Verbose performance information")
+ parser.add_argument("--verbose-progress", action="store_true", help="Verbose progress information")
parser.add_argument(
"--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
)
@@ -555,6 +556,7 @@ def main(args=None):
verbose_operators=args.verbose_operators,
verbose_weights=args.verbose_weights,
verbose_performance=args.verbose_performance,
+ verbose_progress=args.verbose_progress,
show_cpu_operations=args.show_cpu_operations,
tensor_allocator=args.tensor_allocator,
timing=args.timing,
@@ -568,6 +570,7 @@ def main(args=None):
optimization_strategy=args.optimise,
sram_target=arch.arena_cache_size,
verbose_schedule=args.verbose_schedule,
+ verbose_progress=args.verbose_progress,
)
model_reader_options = model_reader.ModelReaderOptions()