aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohan Alfven <johan.alfven@arm.com>2024-01-25 12:32:13 +0100
committerJohan Alfven <johan.alfven@arm.com>2024-01-30 12:25:05 +0000
commit014bc28324014dc82e3b352d7a81df9ed0338c56 (patch)
treeb0b5e765319531da441874e03c4c81980bd6f800
parentcbec599c97b8620580ed5f80ea253770df872fa1 (diff)
downloadethos-u-vela-014bc28324014dc82e3b352d7a81df9ed0338c56.tar.gz
MLBEDSW-8569: MLCE: Reported number of CPU ops are wrong
- A Pack op is implemented by several AvgPool ops. Depending on number of CPU ops and graph topology this could result in that the AvgPool ops ended up in different nodes. One of these node had the Pack output referenced to it but the other node did not. As a result the full graph was not traversed when calculating CPU ops. - The compiled network works as intended but the number of reported CPU was wrong. - Added new method that extracts the ops using the passes in the sub graphs which fix the problem. Change-Id: Ie88ebd4669783559258ae763737a4c7f86c905f8 Signed-off-by: Johan Alfven <johan.alfven@arm.com>
-rw-r--r--ethosu/vela/nn_graph.py17
-rw-r--r--ethosu/vela/stats_writer.py6
2 files changed, 19 insertions, 4 deletions
diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py
index 50266d5..92c7e1b 100644
--- a/ethosu/vela/nn_graph.py
+++ b/ethosu/vela/nn_graph.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -318,6 +318,7 @@ class Subgraph:
ps.outputs = [out_tens for out_tens in ps.outputs if len(out_tens.consumers()) > 0]
ps.ops = [op for op in ps.ops if op.outputs[0] in ps.outputs]
+ # get_all_ops is used when traversing the original graph
def get_all_ops(self):
all_ops = []
visit_op_set = set()
@@ -344,6 +345,20 @@ class Subgraph:
return all_ops
+ # get_all_ops_from_passes is used by stats writer to calculate the number of
+ # CPU and NPU ops
+ # Due to a side effect get_all_ops might not be traversing the full graph
+ # after extract_npu_subgraph have been called and should not be used by stats writer.
+ # The reason is that the main graph might have NPU nodes with no visible outputs
+ # and therefore the nodes will be missed.
+ def get_all_ops_from_passes(self):
+ all_ops = []
+ for idx, ps in enumerate(self.passes):
+ for op in ps.ops:
+ all_ops.append(op)
+
+ return all_ops
+
def print_operators(self, ignore_placeholder_const=True, show_attributes=True):
print(f"Operators of Subgraph {self.name}")
diff --git a/ethosu/vela/stats_writer.py b/ethosu/vela/stats_writer.py
index 25c9030..b743a5f 100644
--- a/ethosu/vela/stats_writer.py
+++ b/ethosu/vela/stats_writer.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2022, 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -365,11 +365,11 @@ def print_performance_metrics(nng, arch, show_cpu_operations=False, verbose_weig
for sg in nng.subgraphs:
if sg.placement == PassPlacement.Cpu:
- for op in sg.get_all_ops():
+ for op in sg.get_all_ops_from_passes():
if op.type not in ir_only_ops:
cpu_operations.append(op)
elif sg.placement == PassPlacement.Npu:
- for op in sg.get_all_ops():
+ for op in sg.get_all_ops_from_passes():
if op.type not in ir_only_ops:
npu_operations.append(op)