aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Hall <tim.hall@arm.com>2022-11-11 18:55:49 +0000
committerTim Hall <tim.hall@arm.com>2022-11-14 12:50:22 +0000
commit5ae6cb0b7a50f8164d5a4568126e90562bb1591a (patch)
tree28d2030aa3445e5c32a79c381602235dd9cb8060
parentf3c7d557371b26835e3183064de58354f3a8b3cb (diff)
downloadethos-u-vela-5ae6cb0b7a50f8164d5a4568126e90562bb1591a.tar.gz
MLBEDSW-7040: verbose-performance doesn't report the original op
- Fixed the reporting of the input network operator to correctly report the original operator type rather than the current one - Fixed a divide by zero bug when calculating percentages - Refactored the verbose-performance code so that console and csv outputs use a single definition of the header and data Signed-off-by: Tim Hall <tim.hall@arm.com> Change-Id: Ibd3fa99b65f0602dcdcff696f2d565ac13453306
-rw-r--r--ethosu/vela/npu_performance.py189
1 files changed, 99 insertions, 90 deletions
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index b6ebe11f..929548ec 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -762,6 +762,13 @@ def print_performance(
mem_usage: dict,
output_basename: str,
):
+ def _percentage(part, whole):
+ # desired behaviour is for division by zero to return 100%
+ if whole == 0:
+ return 100.0
+ else:
+ return part / whole * 100.0
+
if network_type == NetworkType.TFLite:
nng_optype_to_input_op_type = tflite_optype_to_builtintype
else:
@@ -769,103 +776,105 @@ def print_performance(
suid_inv_map = {v: k for k, v in DebugDatabase._sourceUID.items()}
+ # the header is a list (one entry per column) of tuples (column name, alignment, width, precision)
+ header = [
+ (f"{network_type.name}_operator", "<", 20, -1),
+ ("NNG Operator", "<", 20, -1),
+ ("SRAM Usage", ">", 10, 0.0),
+ ("Peak%", ">", 6, 0.2),
+ ("Op Cycles", ">", 10, 0.0),
+ ("Network%", ">", 8, 0.2),
+ ("NPU", ">", 10, 0.0),
+ ("SRAM AC", ">", 10, 0.0),
+ ("DRAM AC", ">", 10, 0.0),
+ ("OnFlash AC", ">", 10, 0.0),
+ ("OffFlash AC", ">", 11, 0.0),
+ ("MAC Count", ">", 10, 0.0),
+ ("Network%", ">", 8, 0.2),
+ ("Util%", ">", 6, 0.2),
+ ("Name", "<", 20, -1),
+ ]
+
+ # open the csv
+ csv_file = open(output_basename + "_per-layer.csv", "w", encoding="UTF8")
+ writer = csv.writer(csv_file)
+
for sg in nng.subgraphs:
if sg.placement != PassPlacement.Npu:
continue
- print(f"\n{str('#') * 80}")
- print(f"Performance for NPU Subgraph {sg.name}")
- print(
- f" {network_type.name + str(' Operator:'):20s}"
- f" {str('NNG Operator:'):20s}"
- f" {str('SRAM Usage'):>10s}"
- f" ({str('Peak'):>6s}%):"
- f"{str('Op Cycles'):>10s}"
- f" ({str('Netwrk'):>6s}%)"
- f" ["
- f" {str('NPU'):>10s}"
- f" {str('SRAM AC'):>10s}"
- f" {str('DRAM AC'):>10s}"
- f" {str('OnFlash AC'):>10s}"
- f" {str('OffFlashAC'):>10s}"
- f" ]:"
- f"{str('MAC Count'):>10s}"
- f" ({str('Netwrk'):>6s}% / {str('Util'):>6s}%):"
- f"Name:"
- )
+ sg_seperator_text = f"\n{str('#') * 80}\nPerformance for NPU Subgraph {sg.name}"
- with open(output_basename + "_per-layer.csv", "w", encoding="UTF8") as f:
- writer = csv.writer(f)
- header = [
- f"{network_type.name}_operator",
- "NNG_operator",
- "SRAM_usage",
- "Peak",
- "Op_cycles",
- "Network",
- "NPU",
- "SRAM_AC",
- "DRAM_AC",
- "OnFlash_AC",
- "OffFlash_AC",
- "MAC_count",
- "Network",
- "Util",
- "Name",
- ]
- writer.writerow(header)
-
- for sched_op in sg.sched_ops:
- # get source op name
- sched_op_src_uid = DebugDatabase._optimisedUID[sched_op.parent_op][1]
- if sched_op_src_uid == DebugDatabase.NULLREF:
- src_op_type = None
- else:
- src_op_type = suid_inv_map[sched_op_src_uid].type
-
- src_op_name = nng_optype_to_input_op_type(src_op_type)
-
- max_macs = cycles[sched_op][PassCycles.Total] * arch.num_macs_per_cycle * arch.ncores
- peak_sram = (
- mem_usage[sched_op] / nng.memory_used[MemArea.Sram] * 100 if MemArea.Sram in nng.memory_used else 0
- )
- print(
- f" {src_op_name:20s}"
- f" {sched_op.op_type:20s}"
- f" {mem_usage[sched_op]:10.0f}"
- f" ({peak_sram:6.2f}%)"
- f" {cycles[sched_op][PassCycles.Total]:10.0f}"
- f" ({cycles[sched_op][PassCycles.Total] / nng.cycles[PassCycles.Total] * 100:6.2f}%)"
- f" ["
- f" {cycles[sched_op][PassCycles.Npu]:10.0f}"
- f" {cycles[sched_op][PassCycles.SramAccess]:10.0f}"
- f" {cycles[sched_op][PassCycles.DramAccess]:10.0f}"
- f" {cycles[sched_op][PassCycles.OnChipFlashAccess]:10.0f}"
- f" {cycles[sched_op][PassCycles.OffChipFlashAccess]:10.0f}"
- f" ]"
- f" {macs[sched_op]:10d}"
- f" ({macs[sched_op] / nng.macs * 100:6.2f}% / {macs[sched_op] / max_macs * 100:6.2f}%)"
- f" {sched_op.name:s}"
- )
- data = [
- f"{src_op_name}",
- f"{sched_op.op_type}",
- f"{mem_usage[sched_op]}",
- f"{peak_sram}",
- f"{cycles[sched_op][PassCycles.Total]}",
- f"{cycles[sched_op][PassCycles.Total] / nng.cycles[PassCycles.Total]}",
- f"{cycles[sched_op][PassCycles.Npu]}",
- f"{cycles[sched_op][PassCycles.SramAccess]}",
- f"{cycles[sched_op][PassCycles.DramAccess]}",
- f"{cycles[sched_op][PassCycles.OnChipFlashAccess]}",
- f"{cycles[sched_op][PassCycles.OffChipFlashAccess]}",
- f"{macs[sched_op]}",
- f"{macs[sched_op] / nng.macs}",
- f"{macs[sched_op] / max_macs}",
- f"{sched_op.name}",
+ # the data is a list (one entry per op) of lists (matching the header columns)
+ data = []
+ for sched_op in sg.sched_ops:
+ # get source op name
+ sched_op_src_uid = DebugDatabase._optimisedUID[sched_op.parent_op][1]
+ if sched_op_src_uid == DebugDatabase.NULLREF:
+ src_op_type = None
+ else:
+ src_op_type = suid_inv_map[sched_op_src_uid].original_type
+
+ src_op_name = nng_optype_to_input_op_type(src_op_type)
+
+ max_macs = cycles[sched_op][PassCycles.Total] * arch.num_macs_per_cycle * arch.ncores
+ peak_sram = (
+ _percentage(mem_usage[sched_op], nng.memory_used[MemArea.Sram])
+ if MemArea.Sram in nng.memory_used
+ else 0
+ )
+
+ data.append(
+ [
+ src_op_name,
+ sched_op.op_type,
+ mem_usage[sched_op],
+ peak_sram,
+ cycles[sched_op][PassCycles.Total],
+ _percentage(cycles[sched_op][PassCycles.Total], nng.cycles[PassCycles.Total]),
+ cycles[sched_op][PassCycles.Npu],
+ cycles[sched_op][PassCycles.SramAccess],
+ cycles[sched_op][PassCycles.DramAccess],
+ cycles[sched_op][PassCycles.OnChipFlashAccess],
+ cycles[sched_op][PassCycles.OffChipFlashAccess],
+ macs[sched_op],
+ _percentage(macs[sched_op], nng.macs),
+ _percentage(macs[sched_op], max_macs),
+ sched_op.name,
]
- writer.writerow(x for x in data)
+ )
+
+ # print to console
+ print(sg_seperator_text)
+ line = ""
+ line2 = ""
+ for col_name, align, width, _ in header:
+ line_data = f"{col_name:{align}{width}}"
+ line += line_data + " "
+ line2 += "-" * len(line_data) + " "
+ print(line)
+ print(line2)
+
+ for op_data in data:
+ line = ""
+ for idx, item in enumerate(op_data):
+ _, align, width, precision = header[idx]
+ if precision == -1:
+ w = str(width)
+ else:
+ w = str(width + precision) + "f"
+ line += f"{item:{align}{w}}" + " "
+ print(line)
+
+ # print to csv
+ writer.writerow((sg_seperator_text,))
+ writer.writerow(col_name for col_name, _, _, _ in header)
+ for op_data in data:
+ writer.writerow(op_data)
+
+ # close the csv
+ csv_file.close()
def calc_new_performance_for_network(