aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorerik.andersson@arm.com <erik.andersson@arm.com>2021-03-24 10:32:09 +0100
committererik.andersson@arm.com <erik.andersson@arm.com>2021-03-30 09:20:00 +0200
commit3438c929528583bc019055ad7057c08271b0cee7 (patch)
treeea0f8af0e5f23e567c6d31f4f7c639a0e94f6dbf
parent9bb1e2ed361286769c362f002910d1dbd1736b05 (diff)
downloadethos-u-vela-3438c929528583bc019055ad7057c08271b0cee7.tar.gz
MLBEDSW-4219: Add tensor allocation info to summary
Added the theoretically minimum max memory usage and the allocator overhead to the Vela summary. Signed-off-by: erik.andersson@arm.com <erik.andersson@arm.com> Change-Id: If373dfeaac50d6f8b56554d435bf22af2c3acda3
-rw-r--r--ethosu/vela/nn_graph.py1
-rw-r--r--ethosu/vela/stats_writer.py8
-rw-r--r--ethosu/vela/tensor_allocation.py12
3 files changed, 21 insertions, 0 deletions
diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py
index c45d0e3e..19734873 100644
--- a/ethosu/vela/nn_graph.py
+++ b/ethosu/vela/nn_graph.py
@@ -154,6 +154,7 @@ class Subgraph:
self.memory_used = {}
self.memory_used_per_type = {}
+ self.min_mem_usage = 0
def __str__(self):
return "<nng.Subgraph '%s', n_passes=%d, n_cascaded_passes=%d>" % (
diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py
index d8673683..3d92d2ce 100644
--- a/ethosu/vela/stats_writer.py
+++ b/ethosu/vela/stats_writer.py
@@ -223,6 +223,7 @@ def print_performance_metrics_for_strat(
bandwidths,
batch_size,
memory_used,
+ min_mem_usage,
num_passes,
num_cascaded_passes,
n_operations=0,
@@ -265,6 +266,11 @@ def print_performance_metrics_for_strat(
aug_label = label + " used"
print(f"Total {aug_label:25} {memory_used[mem_area] / 1024.0:12.2f} KiB", file=f)
+ if mem_area == MemArea.Sram and min_mem_usage:
+ mem_used = memory_used[[mem_area for mem_area, _ in mem_area_labels if "SRAM" in mem_area][0]] / 1024.0
+ fraction = (mem_used - min_mem_usage / 1024.0) / (min_mem_usage / 1024.0)
+ print(f"Theoretical minimum SRAM usage{min_mem_usage/1024.0:23.2F} KiB", file=f)
+ print(f"Allocator overhead{100*fraction:35.2F} %", file=f)
print(file=f)
print(f"{num_passes:d} passes fused into {num_cascaded_passes:d}", file=f)
@@ -353,6 +359,7 @@ def print_performance_metrics(nng, arch, show_cpu_operations=False, f=sys.stdout
n_cascaded_passes = sum(len(sg.cascaded_passes) for sg in nng.subgraphs)
n_operations = sum(len(ps.ops) for sg in nng.subgraphs for ps in sg.passes)
cpu_operations = sum((ps.ops for sg in nng.subgraphs for ps in sg.passes if ps.placement == PassPlacement.Cpu), [])
+ min_mem_usage = max(sg.min_mem_usage for sg in nng.subgraphs)
return print_performance_metrics_for_strat(
arch,
nng.name,
@@ -361,6 +368,7 @@ def print_performance_metrics(nng, arch, show_cpu_operations=False, f=sys.stdout
nng.bandwidths,
nng.batch_size,
nng.memory_used,
+ min_mem_usage,
n_passes,
n_cascaded_passes,
n_operations,
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py
index 621073a3..b2ea7de6 100644
--- a/ethosu/vela/tensor_allocation.py
+++ b/ethosu/vela/tensor_allocation.py
@@ -142,6 +142,17 @@ def print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation):
print()
+def calculate_allocation_efficiency(lrs):
+ lr_set = set(lrs.ranges.values())
+
+ size_at_time = [0] * (1 + max(lr.end_time for lr in lr_set))
+ for lr in lr_set:
+ for t in range(lr.start_time, lr.end_time + 1):
+ size_at_time[t] += lr.size
+
+ return max(size_at_time)
+
+
def allocate_tensors(
nng,
sg,
@@ -199,6 +210,7 @@ def allocate_tensors(
print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation)
if mem_area == MemArea.Sram:
+ sg.min_mem_usage = calculate_allocation_efficiency(lrs)
# Mark Sram usage for all subgraphs
for sg_ in nng.subgraphs:
mark_sram_used_for_cascaded_passes(sg_, lrs)