aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwilisa01 <william.isaksson@arm.com>2022-08-22 16:13:06 +0000
committerRickard Bolin <rickard.bolin@arm.com>2022-09-01 10:00:50 +0000
commit89a8cdd5425521f68674ac23a78790f0f6dc98ed (patch)
tree88758bbdff463079820cf91825f6c8116346b05c
parent8ddd4899892dace88306b3b155dbf47cc47fa4cd (diff)
downloadethos-u-vela-89a8cdd5425521f68674ac23a78790f0f6dc98ed.tar.gz
MLBEDSW-6755: Add per-layer performance to CSV file
Dump the current per-layer performance estimation information that appears on the terminal to a CSV file. Change-Id: I00e94168704be8c3c674c8779fb807ed28607ccd Signed-off-by: wilisa01 <william.isaksson@arm.com>
-rw-r--r--ethosu/vela/compiler_driver.py8
-rw-r--r--ethosu/vela/npu_performance.py121
-rw-r--r--ethosu/vela/vela.py4
3 files changed, 91 insertions, 42 deletions
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index 1d8756b5..cace0f08 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -149,7 +149,7 @@ def _check_schedule(nng, arch, scheduler_options):
)
-def compiler_driver(nng, arch, options, scheduler_options, network_type):
+def compiler_driver(nng, arch, options, scheduler_options, network_type, output_basename):
assert verify_graph_health(nng)
# Pre-optimisation operator tracking
@@ -254,4 +254,6 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type):
cpu_tensor_alignment=options.cpu_tensor_alignment,
)
- npu_performance.calc_new_performance_for_network(nng, arch, network_type, options.verbose_performance)
+ npu_performance.calc_new_performance_for_network(
+ nng, arch, network_type, options.verbose_performance, output_basename
+ )
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 6d99dea0..b6ebe11f 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -20,6 +20,7 @@
# Called during scheduling to evaluate different proposals, as well as post-scheduling to provide a final performance
# estimate.
import copy
+import csv
from enum import auto
from enum import IntEnum
from typing import Optional
@@ -759,6 +760,7 @@ def print_performance(
macs: dict,
cycles: dict,
mem_usage: dict,
+ output_basename: str,
):
if network_type == NetworkType.TFLite:
nng_optype_to_input_op_type = tflite_optype_to_builtintype
@@ -793,41 +795,86 @@ def print_performance(
f"Name:"
)
- for sched_op in sg.sched_ops:
- # get source op name
- sched_op_src_uid = DebugDatabase._optimisedUID[sched_op.parent_op][1]
- if sched_op_src_uid == DebugDatabase.NULLREF:
- src_op_type = None
- else:
- src_op_type = suid_inv_map[sched_op_src_uid].type
-
- src_op_name = nng_optype_to_input_op_type(src_op_type)
-
- max_macs = cycles[sched_op][PassCycles.Total] * arch.num_macs_per_cycle * arch.ncores
- peak_sram = (
- mem_usage[sched_op] / nng.memory_used[MemArea.Sram] * 100 if MemArea.Sram in nng.memory_used else 0
- )
- print(
- f" {src_op_name:20s}"
- f" {sched_op.op_type:20s}"
- f" {mem_usage[sched_op]:10.0f}"
- f" ({peak_sram:6.2f}%)"
- f" {cycles[sched_op][PassCycles.Total]:10.0f}"
- f" ({cycles[sched_op][PassCycles.Total] / nng.cycles[PassCycles.Total] * 100:6.2f}%)"
- f" ["
- f" {cycles[sched_op][PassCycles.Npu]:10.0f}"
- f" {cycles[sched_op][PassCycles.SramAccess]:10.0f}"
- f" {cycles[sched_op][PassCycles.DramAccess]:10.0f}"
- f" {cycles[sched_op][PassCycles.OnChipFlashAccess]:10.0f}"
- f" {cycles[sched_op][PassCycles.OffChipFlashAccess]:10.0f}"
- f" ]"
- f" {macs[sched_op]:10d}"
- f" ({macs[sched_op] / nng.macs * 100:6.2f}% / {macs[sched_op] / max_macs * 100:6.2f}%)"
- f" {sched_op.name:s}"
- )
-
-
-def calc_new_performance_for_network(nng: Graph, arch, network_type: NetworkType, verbose_performance: bool):
+ with open(output_basename + "_per-layer.csv", "w", encoding="UTF8") as f:
+ writer = csv.writer(f)
+ header = [
+ f"{network_type.name}_operator",
+ "NNG_operator",
+ "SRAM_usage",
+ "Peak",
+ "Op_cycles",
+ "Network",
+ "NPU",
+ "SRAM_AC",
+ "DRAM_AC",
+ "OnFlash_AC",
+ "OffFlash_AC",
+ "MAC_count",
+ "Network",
+ "Util",
+ "Name",
+ ]
+ writer.writerow(header)
+
+ for sched_op in sg.sched_ops:
+ # get source op name
+ sched_op_src_uid = DebugDatabase._optimisedUID[sched_op.parent_op][1]
+ if sched_op_src_uid == DebugDatabase.NULLREF:
+ src_op_type = None
+ else:
+ src_op_type = suid_inv_map[sched_op_src_uid].type
+
+ src_op_name = nng_optype_to_input_op_type(src_op_type)
+
+ max_macs = cycles[sched_op][PassCycles.Total] * arch.num_macs_per_cycle * arch.ncores
+ peak_sram = (
+ mem_usage[sched_op] / nng.memory_used[MemArea.Sram] * 100 if MemArea.Sram in nng.memory_used else 0
+ )
+ print(
+ f" {src_op_name:20s}"
+ f" {sched_op.op_type:20s}"
+ f" {mem_usage[sched_op]:10.0f}"
+ f" ({peak_sram:6.2f}%)"
+ f" {cycles[sched_op][PassCycles.Total]:10.0f}"
+ f" ({cycles[sched_op][PassCycles.Total] / nng.cycles[PassCycles.Total] * 100:6.2f}%)"
+ f" ["
+ f" {cycles[sched_op][PassCycles.Npu]:10.0f}"
+ f" {cycles[sched_op][PassCycles.SramAccess]:10.0f}"
+ f" {cycles[sched_op][PassCycles.DramAccess]:10.0f}"
+ f" {cycles[sched_op][PassCycles.OnChipFlashAccess]:10.0f}"
+ f" {cycles[sched_op][PassCycles.OffChipFlashAccess]:10.0f}"
+ f" ]"
+ f" {macs[sched_op]:10d}"
+ f" ({macs[sched_op] / nng.macs * 100:6.2f}% / {macs[sched_op] / max_macs * 100:6.2f}%)"
+ f" {sched_op.name:s}"
+ )
+ data = [
+ f"{src_op_name}",
+ f"{sched_op.op_type}",
+ f"{mem_usage[sched_op]}",
+ f"{peak_sram}",
+ f"{cycles[sched_op][PassCycles.Total]}",
+ f"{cycles[sched_op][PassCycles.Total] / nng.cycles[PassCycles.Total]}",
+ f"{cycles[sched_op][PassCycles.Npu]}",
+ f"{cycles[sched_op][PassCycles.SramAccess]}",
+ f"{cycles[sched_op][PassCycles.DramAccess]}",
+ f"{cycles[sched_op][PassCycles.OnChipFlashAccess]}",
+ f"{cycles[sched_op][PassCycles.OffChipFlashAccess]}",
+ f"{macs[sched_op]}",
+ f"{macs[sched_op] / nng.macs}",
+ f"{macs[sched_op] / max_macs}",
+ f"{sched_op.name}",
+ ]
+ writer.writerow(x for x in data)
+
+
+def calc_new_performance_for_network(
+ nng: Graph,
+ arch,
+ network_type: NetworkType,
+ verbose_performance: bool,
+ output_basename: str = "output/unnamed_network",
+):
total_bws = make_bandwidth_array()
total_macs = 0
total_cycles = np.zeros(PassCycles.Size)
@@ -886,4 +933,4 @@ def calc_new_performance_for_network(nng: Graph, arch, network_type: NetworkType
nng.total_npu_encoded_weights = total_encoded_weight_size
if verbose_performance:
- print_performance(nng, arch, network_type, bws, macs, cycles, mem_usage)
+ print_performance(nng, arch, network_type, bws, macs, cycles, mem_usage, output_basename)
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index 1de437bb..a42b2188 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -75,7 +75,7 @@ def process(input_name, enable_debug_db, arch, model_reader_options, compiler_op
print("Model reading took %f s" % (stop - start))
start = time.time()
- compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options, network_type)
+ compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options, network_type, output_basename)
summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)
stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)