diff options
-rw-r--r-- | ethosu/vela/nn_graph.py | 1 | ||||
-rw-r--r-- | ethosu/vela/stats_writer.py | 8 | ||||
-rw-r--r-- | ethosu/vela/tensor_allocation.py | 12 |
3 files changed, 21 insertions, 0 deletions
diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py index c45d0e3e..19734873 100644 --- a/ethosu/vela/nn_graph.py +++ b/ethosu/vela/nn_graph.py @@ -154,6 +154,7 @@ class Subgraph: self.memory_used = {} self.memory_used_per_type = {} + self.min_mem_usage = 0 def __str__(self): return "<nng.Subgraph '%s', n_passes=%d, n_cascaded_passes=%d>" % ( diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py index d8673683..3d92d2ce 100644 --- a/ethosu/vela/stats_writer.py +++ b/ethosu/vela/stats_writer.py @@ -223,6 +223,7 @@ def print_performance_metrics_for_strat( bandwidths, batch_size, memory_used, + min_mem_usage, num_passes, num_cascaded_passes, n_operations=0, @@ -265,6 +266,11 @@ def print_performance_metrics_for_strat( aug_label = label + " used" print(f"Total {aug_label:25} {memory_used[mem_area] / 1024.0:12.2f} KiB", file=f) + if mem_area == MemArea.Sram and min_mem_usage: + mem_used = memory_used[[mem_area for mem_area, _ in mem_area_labels if "SRAM" in mem_area][0]] / 1024.0 + fraction = (mem_used - min_mem_usage / 1024.0) / (min_mem_usage / 1024.0) + print(f"Theoretical minimum SRAM usage{min_mem_usage/1024.0:23.2F} KiB", file=f) + print(f"Allocator overhead{100*fraction:35.2F} %", file=f) print(file=f) print(f"{num_passes:d} passes fused into {num_cascaded_passes:d}", file=f) @@ -353,6 +359,7 @@ def print_performance_metrics(nng, arch, show_cpu_operations=False, f=sys.stdout n_cascaded_passes = sum(len(sg.cascaded_passes) for sg in nng.subgraphs) n_operations = sum(len(ps.ops) for sg in nng.subgraphs for ps in sg.passes) cpu_operations = sum((ps.ops for sg in nng.subgraphs for ps in sg.passes if ps.placement == PassPlacement.Cpu), []) + min_mem_usage = max(sg.min_mem_usage for sg in nng.subgraphs) return print_performance_metrics_for_strat( arch, nng.name, @@ -361,6 +368,7 @@ def print_performance_metrics(nng, arch, show_cpu_operations=False, f=sys.stdout nng.bandwidths, nng.batch_size, nng.memory_used, + min_mem_usage, n_passes, n_cascaded_passes, n_operations, diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py index 621073a3..b2ea7de6 100644 --- a/ethosu/vela/tensor_allocation.py +++ b/ethosu/vela/tensor_allocation.py @@ -142,6 +142,17 @@ def print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation): print() +def calculate_allocation_efficiency(lrs): + lr_set = set(lrs.ranges.values()) + + size_at_time = [0] * (1 + max(lr.end_time for lr in lr_set)) + for lr in lr_set: + for t in range(lr.start_time, lr.end_time + 1): + size_at_time[t] += lr.size + + return max(size_at_time) + + def allocate_tensors( nng, sg, @@ -199,6 +210,7 @@ def allocate_tensors( print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation) if mem_area == MemArea.Sram: + sg.min_mem_usage = calculate_allocation_efficiency(lrs) # Mark Sram usage for all subgraphs for sg_ in nng.subgraphs: mark_sram_used_for_cascaded_passes(sg_, lrs) |