aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Verhaard <louis.verhaard@arm.com>2020-09-29 13:57:21 +0200
committerLouis Verhaard <louis.verhaard@arm.com>2020-09-30 09:03:22 +0200
commit0265f402c7ae1e875470298b4130fcc2f7ab4e23 (patch)
tree9cd83f1c755ae118b84c296283bec9152f04f058
parent7ad862ada22dc00c0306ba0f1eafbef98f23c77c (diff)
downloadethos-u-vela-0265f402c7ae1e875470298b4130fcc2f7ab4e23.tar.gz
MLBEDSW-3025: Remove SHRAM from reports
SHRAM is removed from performance reports, as the SHRAM numbers only include LUT usage. Change-Id: I5d92bb3be9c8e38dad26ac8ef97c84ecb0aff2fa Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
-rw-r--r--ethosu/vela/stats_writer.py22
1 files changed, 14 insertions, 8 deletions
diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py
index af7b6997..2ea14f2b 100644
--- a/ethosu/vela/stats_writer.py
+++ b/ethosu/vela/stats_writer.py
@@ -29,9 +29,15 @@ from .tensor import MemArea
from .tensor import TensorPurpose
+def mem_areas_to_report():
+ # Exclude SHRAM, as the SHRAM performance numbers only cover LUT usage
+ return [area for area in MemArea.all() if area != MemArea.Shram]
+
+
def write_summary_metrics_csv(nng, summary_filename, arch):
with open(summary_filename, "w") as f:
writer = csv.writer(f)
+ mem_areas = mem_areas_to_report()
labels = [
"experiment",
@@ -40,7 +46,7 @@ def write_summary_metrics_csv(nng, summary_filename, arch):
labels += (
["accelerator_configuration", "system_config", "npu_clock", "sram_size"]
- + [area.identifier_name() + "_bandwidth" for area in MemArea.all()]
+ + [area.identifier_name() + "_bandwidth" for area in mem_areas]
+ ["weights_storage_area", "feature_map_storage_area"]
)
@@ -51,10 +57,10 @@ def write_summary_metrics_csv(nng, summary_filename, arch):
"passes_before_fusing",
"passes_after_fusing",
]
- labels += [area.identifier_name() + "_memory_used" for area in MemArea.all()]
+ labels += [area.identifier_name() + "_memory_used" for area in mem_areas]
labels += ["on_chip_flash_bits_per_element", "off_chip_flash_bits_per_element"]
- for mem_area in MemArea.all():
+ for mem_area in mem_areas:
labels += [
mem_area.identifier_name() + "_feature_map_read_bytes",
mem_area.identifier_name() + "_feature_map_write_bytes",
@@ -77,7 +83,7 @@ def write_summary_metrics_csv(nng, summary_filename, arch):
if arch:
data_items += (
[arch.accelerator_config, arch.system_config, arch.npu_clock, arch.sram_size / 1024]
- + [arch.memory_bandwidths_per_second[mem_area] / 1000.0 / 1000 / 1000 for mem_area in MemArea.all()]
+ + [arch.memory_bandwidths_per_second[mem_area] / 1000.0 / 1000 / 1000 for mem_area in mem_areas]
+ [
arch.tensor_storage_mem_area[TensorPurpose.Weights].display_name(),
arch.tensor_storage_mem_area[TensorPurpose.FeatureMap].display_name(),
@@ -94,14 +100,14 @@ def write_summary_metrics_csv(nng, summary_filename, arch):
n_cascaded_passes = sum(len(sg.cascaded_passes) for sg in nng.subgraphs)
data_items += [midpoint_fps, nng.batch_size, midpoint_inference_time, n_passes, n_cascaded_passes]
- data_items += [nng.memory_used.get(mem_area, 0) / 1024.0 for mem_area in MemArea.all()]
+ data_items += [nng.memory_used.get(mem_area, 0) / 1024.0 for mem_area in mem_areas]
data_items += [
nng.bits_per_element.get(MemArea.OnChipFlash, 0.0),
nng.bits_per_element.get(MemArea.OffChipFlash, 0.0),
]
- for mem_area in MemArea.all():
+ for mem_area in mem_areas:
bws = nng.bandwidths[mem_area]
total_bw = np.sum(bws)
weight_bws = bws[TensorPurpose.Weights]
@@ -144,7 +150,7 @@ def write_pass_metrics_csv(nng, pass_filename):
)
bandwidth_names = []
bandwidth_indices = []
- for mem_area in MemArea.all():
+ for mem_area in mem_areas_to_report():
for purpose, purpose_candidates in purpose_list:
for direction, direction_candidates in direction_list:
label = "bytes_%s_%s_%s" % (mem_area.identifier_name(), purpose, direction)
@@ -231,7 +237,7 @@ def print_performance_metrics_for_strat(
f=sys.stdout,
):
- orig_mem_areas_labels = [(v, v.display_name()) for v in MemArea.all()]
+ orig_mem_areas_labels = [(v, v.display_name()) for v in mem_areas_to_report()]
midpoint_inference_time = cycles[PassCycles.Total] / arch.npu_clock
if midpoint_inference_time > 0: