aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Ward <james.ward@arm.com>2021-10-14 12:58:02 +0100
committerJames Ward <james.ward@arm.com>2021-11-11 13:04:08 +0000
commit9338978eae5656005d8ac909b41aa5abac7a83a7 (patch)
tree70fe15e282630cb9528281cf36170e70d07cd52f
parentcc8569fcd243d7c96a20e0f531f6f97a90df83f7 (diff)
downloadethos-u-vela-9338978eae5656005d8ac909b41aa5abac7a83a7.tar.gz
MLBEDSW-5410 Clarify "Subgraph IO Summary" output
* Add small aesthetic changes to summary * Move "_cpu" suffix from cloned tensor to original tensor such that suffix is no longer externally visible Signed-off-by: James Ward <james.ward@arm.com> Change-Id: I97427561bd9acb04765ae9de6278760511278118
-rw-r--r--ethosu/vela/extract_npu_subgraphs.py3
-rw-r--r--ethosu/vela/vela.py41
2 files changed, 24 insertions, 20 deletions
diff --git a/ethosu/vela/extract_npu_subgraphs.py b/ethosu/vela/extract_npu_subgraphs.py
index b536cfb..d802b51 100644
--- a/ethosu/vela/extract_npu_subgraphs.py
+++ b/ethosu/vela/extract_npu_subgraphs.py
@@ -104,7 +104,8 @@ def rewrite_tensor_npu_producer_cpu_consumers(
orig_tens, call_ps, startup_init_ps, npu_subgraph, cpu_subgraph, subgraph_for_pass
):
- new_tens = orig_tens.clone("_cpu")
+ new_tens = orig_tens.clone("")
+ orig_tens.name = orig_tens.name + "_cpu"
npu_subgraph.output_tensors.append(orig_tens)
call_ps.outputs.append(new_tens)
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index 8a80827..63cccc5 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -37,7 +37,6 @@ from .debug_database import DebugDatabase
from .errors import InputFileError
from .errors import VelaError
from .nn_graph import NetworkType
-from .nn_graph import PassPlacement
from .nn_graph import TensorAllocator
from .tensor import MemArea
from .tensor import Tensor
@@ -140,30 +139,34 @@ def print_subgraph_io_summary(nng):
print("Subgraph IO Summary")
print("-------------------")
- print("NNG: {0}".format(nng.name))
+ print(f"NNG: {nng.name}")
max_sg_size = 0
for sg in reversed(nng.subgraphs):
- print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
+ print(f" NNG Subgraph: {sg.name} = {sg.placement}")
sg_size = 0
- if sg.placement == PassPlacement.Npu:
- for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
- if tens in sg.input_tensors:
- tens_dir = "In"
- elif tens in sg.output_tensors:
- tens_dir = "Out"
- else:
- tens_dir = "In/Out"
-
- size = tens.elements() * tens.element_size() / 1024.0
- sg_size = sg_size + size
- print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
-
- print(" Total Size = {0} KiB".format(sg_size))
- print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
+ if hasattr(sg, "scratch_tensor") and sg.scratch_tensor is not None:
+ sg_tensors = sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors
+ else:
+ sg_tensors = sg.input_tensors + sg.output_tensors
+
+ for tens in sg_tensors:
+ if tens in sg.input_tensors:
+ tens_dir = "In"
+ elif tens in sg.output_tensors:
+ tens_dir = "Out"
+ else:
+ tens_dir = "In/Out"
+
+ size = tens.elements() * tens.element_size() / 1024.0
+ sg_size = sg_size + size
+ print(f" Tensor [{tens_dir}]: {tens.name} = {size} KiB")
+
+ print(f" Total Size = {sg_size} KiB")
+ print(f" SRAM Memory Used = {sg.memory_used.get(MemArea.Sram, 0) / 1024.0} KiB")
max_sg_size = max(sg_size, max_sg_size)
- print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
+ print(f" Maximum NNG Subgraph Size = {max_sg_size} KiB")
def generate_supported_ops():