aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAyaan Masood <Ayaan.Masood@arm.com>2022-02-22 11:28:55 +0000
committerAyaan Masood <Ayaan.Masood@arm.com>2022-02-22 11:28:55 +0000
commitb801dda26bbcff8ec4f7967d60f38239fd16912b (patch)
treebb8aa6bc3141f1dd48ffb22630a1eac1cc209825
parent5e0ae5598ab1d7debd603bdd32c7e8f9cad9d581 (diff)
downloadethos-u-vela-b801dda26bbcff8ec4f7967d60f38239fd16912b.tar.gz
MLBEDSW-5880 Fixed Vela verbose weight flag
*Original weights and encoded NPU weight now report correct size instead of zero when running vela with --verbose-weights flag (Code to update the aforementioned attributes was missing) *Removed print references to unencoded NPU weight size Change-Id: I6d3e41c04cc46d24eeb54cab89818a35e5df27be Signed-off-by: Ayaan Masood <Ayaan.Masood@arm.com>
-rw-r--r--ethosu/vela/npu_performance.py33
-rw-r--r--ethosu/vela/scheduler.py3
-rw-r--r--ethosu/vela/stats_writer.py8
3 files changed, 33 insertions, 11 deletions
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 21b420b..08967f4 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -22,6 +22,8 @@
import copy
from enum import auto
from enum import IntEnum
+from typing import Set
+from uuid import UUID
import numpy as np
@@ -31,11 +33,13 @@ from .architecture_features import Accelerator
from .architecture_features import NpuBlockType
from .architecture_features import SHRAMElements
from .architecture_features import TensorFormat
+from .nn_graph import Graph
from .numeric_util import round_up
from .operation import Kernel
from .operation import Op
from .scheduler import Schedule
from .scheduler import SchedulerOperation
+from .scheduler import SchedulerOpInfo
from .shape4d import Shape4D
from .tensor import BandwidthDirection
from .tensor import MemArea
@@ -725,16 +729,39 @@ def estimate_full_op_performance(
return bws, macs, cycles_a
-def calc_new_performance_for_network(nng, arch):
+def calc_new_performance_for_network(nng: Graph, arch):
total_bws = make_bandwidth_array()
total_macs = 0
total_cycles = np.zeros(PassCycles.Size)
+ total_weight_size = 0
+ total_encoded_weight_size = 0
+
+ # Store unique instances of original/encoded weight tensor uuids to prevent double counting of weights
+ original_weight_uuids: Set[UUID] = set()
+ encoded_npu_weight_uuids: Set[UUID] = set()
for sg in nng.subgraphs:
prev_op = None
for sched_op in sg.sched_ops:
- op_info = sg.schedule.cost_map[sched_op]
+ op_info: SchedulerOpInfo = sg.schedule.cost_map[sched_op]
bws, macs, cycles = estimate_full_op_performance(arch, sg.schedule, sched_op, prev_op, op_info.block_config)
+
+ # Tensors for calculating weight sizes
+ original_weight = sched_op.parent_op.weights
+ encoded_npu_weight = op_info.npu_weights_tensor
+
+ # Save UUIDs of original_weight so only unique instances of tensors are used to calculate weights
+ if original_weight and (original_weight.equivalence_id not in original_weight_uuids):
+
+ original_weight_uuids.add(original_weight.equivalence_id)
+ total_weight_size += original_weight.values.itemsize * original_weight.values.size
+
+ # Save UUIDs of encoded_npu_weight so only unique instances of tensors are used to calculate weights
+ if encoded_npu_weight and (encoded_npu_weight.equivalence_id not in encoded_npu_weight_uuids):
+
+ encoded_npu_weight_uuids.add(encoded_npu_weight)
+ total_encoded_weight_size += len(encoded_npu_weight.buffer)
+
total_bws += bws
total_macs += macs
total_cycles += cycles
@@ -743,3 +770,5 @@ def calc_new_performance_for_network(nng, arch):
nng.bandwidths = total_bws
nng.macs = total_macs
nng.cycles = total_cycles
+ nng.total_original_weights = total_weight_size
+ nng.total_npu_encoded_weights = total_encoded_weight_size
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 8f2426c..6b08459 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -339,7 +339,7 @@ class Scheduler:
self.nng = nng
self.sg = sg
self.arch = arch
- self.sched_ops: List(SchedulerOperation) = []
+ self.sched_ops: List[SchedulerOperation] = []
self.max_schedule = None
self.scheduler_options = options
@@ -459,7 +459,6 @@ class Scheduler:
def create_initial_schedule(self) -> Schedule:
"""Creates an initial schedule with no cascading or buffering of any kind"""
schedule = Schedule(self.sg, "MAX")
-
for op in self.sched_ops:
cost = op.create_scheduler_info(self.nng, op.ofm.shape)
cost.cycles = self.estimate_op_performance(op, cost.block_config, op.ofm.shape.depth)
diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py
index 86f531a..d8a274b 100644
--- a/ethosu/vela/stats_writer.py
+++ b/ethosu/vela/stats_writer.py
@@ -110,7 +110,6 @@ def write_summary_metrics_csv(nng, summary_filename, arch):
data_items += [midpoint_fps, nng.batch_size, midpoint_inference_time, n_passes, n_cascaded_passes]
data_items += [nng.memory_used.get(mem_area, 0) / 1024.0 for mem_area in mem_areas]
data_items += [nng.total_original_weights]
- data_items += [nng.total_npu_weights]
data_items += [nng.total_npu_encoded_weights]
for mem_area in mem_areas:
@@ -325,7 +324,6 @@ def print_performance_metrics_for_strat(
if weights_data:
print(f"Original Weights Size {weights_data['original'] / 1024.0:12.2f} KiB", file=f)
- print(f"NPU Weights Size {weights_data['npu'] / 1024.0:12.2f} KiB", file=f)
print(f"NPU Encoded Weights Size {weights_data['npu_encoded'] / 1024.0:12.2f} KiB", file=f)
print(file=f)
@@ -372,11 +370,7 @@ def print_performance_metrics(nng, arch, show_cpu_operations=False, verbose_weig
npu_operations.append(op)
weights_data = (
- {
- "original": nng.total_original_weights,
- "npu": nng.total_npu_weights,
- "npu_encoded": nng.total_npu_encoded_weights,
- }
+ {"original": nng.total_original_weights, "npu_encoded": nng.total_npu_encoded_weights}
if verbose_weights
else None
)