aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Hall <tim.hall@arm.com>2023-08-08 14:10:17 +0100
committerTim Hall <tim.hall@arm.com>2023-08-08 16:36:20 +0100
commitcd03504cfc29767d33d37b5c587116ab90752d74 (patch)
tree187d75690e7316c2e430c8c81e9f526bd8dc20a1
parent4bd28aa1bc8d7dc952c3d4ff523d942540947280 (diff)
downloadethos-u-vela-cd03504cfc29767d33d37b5c587116ab90752d74.tar.gz
MLBEDSW-7689: Document verbose command stream options
- Documented High-Level and register-Level command stream options - Changed High-Level command stream display to show the name of the command - Fixed an issue with some operators not being displayed by the CLI option --verbose-operators - Changed an unneeded print in pass packing to a more useful assertion Change-Id: I9d53f19f4e32d0478209bc964724c27c935f66d6 Signed-off-by: Tim Hall <tim.hall@arm.com>
-rw-r--r--OPTIONS.md47
-rw-r--r--ethosu/vela/high_level_command_stream.py6
-rw-r--r--ethosu/vela/nn_graph.py54
-rw-r--r--ethosu/vela/pass_packing.py2
-rw-r--r--ethosu/vela/register_command_stream_generator.py2
5 files changed, 63 insertions, 48 deletions
diff --git a/OPTIONS.md b/OPTIONS.md
index 38f65a7..646444e 100644
--- a/OPTIONS.md
+++ b/OPTIONS.md
@@ -398,8 +398,8 @@ vela network.tflite --verbose-performance
Displays a list of all operators and the tensors that are connected to them.
Additional information is shown about the tensors. The format is:
-`<num> <op_type> <op_name>`, where;
-` <direction> <idx> <purpose> <mem_area> <mem_type> <tens>`, where;
+`<num> <op_type> <op_name> <direction> <idx> <purpose> <mem_area> <mem_type>
+<tens>`, where;
num = an increasing operator count
op_type = the Graph IR Operator Type
op_name = the Graph IR Operator Name (this may have been derived from the
@@ -466,9 +466,32 @@ vela network.tflite --verbose-allocation
### Verbose High Level Command Stream
-Display a high level command stream with one command per DMA or NPU stripe. The
-commands contain information about block configuration as well as IFM-, OFM-
-and weight boxes.
+Display an enumerated list of High-Level (HL) commands in execution
+order. There are three types of command and each one displays individual
+information:
+
+* NPU Stripe = `<name> <ifm_box> <ifm2_box> <ofm_box> <weight_box>
+<block_config>`, represents a data processing operation that maps directly to
+a single Ethos-U operation where;
+name = name of the pass that corresponds to this HL command (not unique)
+ifm_box = part of the IFM in NHWC format
+ifm2_box = part of the IFM2 in NHWC format (is empty [] when not present)
+ofm_box = part of the OFM in NHWC format
+weight_box = part of the filter kernel in NHWC format
+block_config = block processing size in HWIO format
+
+* DMA = `<in> <out> <box>`, represents a memory copy operation from source to
+destination where;
+name = name of the pass that corresponds to this HL command (not unique)
+in = name of the source tensor
+out = name of the destination tensor
+box = part of the source tensor in NHWC format
+
+* NOP = `<in> <out>`, represents a memory copy operation that has source equal
+to destination and therefore does nothing, where;
+name = name of the pass that corresponds to this HL command (not unique)
+in = name of the input tensor
+out = name of the output tensor
```bash
vela network.tflite --verbose-high-level-command-stream
@@ -476,8 +499,18 @@ vela network.tflite --verbose-high-level-command-stream
### Verbose Register Command Stream
-Display all NPU operations and a register level (low level) command stream with
-all register settings for the network execution on the NPU.
+Display two groups of information. The first group is the input to the register
+command stream generator. The second group is the output of the register
+command stream generator:
+
+* Input = an enumerated list of the High-Level commands that are the input to
+the generator. Each command details all of its attributes.
+
+* Output = a disassembly of the Ethos-U command stream (referred to as the
+register command stream). More information about the commands listed in the
+register command stream can be found in the Arm Ethos-U NPU Technical Reference
+Manuals that are available from the Arm Developer website (see
+[README - Resources](README.md#resources)).
```bash
vela network.tflite --verbose-register-command-stream
diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py
index 09c1805..8403b94 100644
--- a/ethosu/vela/high_level_command_stream.py
+++ b/ethosu/vela/high_level_command_stream.py
@@ -236,7 +236,7 @@ class NpuStripe(Command):
return True
def __str__(self):
- return "<NPUStripe: ps=%s, ifm_box=%s, ifm2_box=%s, ofm_box=%s, weight_box=%s, block_config=%s>" % (
+ return "<NpuStripe: name=%s, ifm_box=%s, ifm2_box=%s, ofm_box=%s, weight_box=%s, block_config=%s>" % (
self.ps.name,
self.ifm_box,
self.ifm2_box,
@@ -286,7 +286,7 @@ class DMA(Command):
self.box = box
def __str__(self):
- return "<DMA: in=%s, out=%s, box=%s>" % (self.in_tensor.name, self.out_tensor.name, self.box)
+ return f"<DMA: name={self.ps.name}, in={self.in_tensor.name}, out={self.out_tensor.name} box={self.box}>"
__repr__ = __str__
@@ -302,7 +302,7 @@ class NOP(Command):
self.out_tensor = out_tensor
def __str__(self):
- return f"<NOP: in={self.in_tensor.name}, out={self.out_tensor.name}>"
+ return f"<NOP: name={self.ps.name}, in={self.in_tensor.name}, out={self.out_tensor.name}>"
__repr__ = __str__
diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py
index a43aac2..6dc6b58 100644
--- a/ethosu/vela/nn_graph.py
+++ b/ethosu/vela/nn_graph.py
@@ -338,41 +338,21 @@ class Subgraph:
return all_ops
- def print_operators(self):
- print("print_operators()", self.name)
- all_ops = self.get_all_ops()
- unique_ops = []
- for op in all_ops:
- if op.type in (Op.Const, Op.Identity, Op.Placeholder):
- continue
-
- attrs = op.attrs.copy()
- if op.type in (Op.Conv2D, Op.Conv2DBias, Op.DepthwiseConv2DBias):
- kshape = op.inputs[1].shape
- attrs["kshape"] = [kshape[0], kshape[1]]
- attrs["type"] = op.type.name
- attrs.pop("use_cudnn_on_gpu", None)
- custom_options = attrs.pop("custom_options", None)
- if attrs not in unique_ops:
- unique_ops.append(attrs)
- # print attributes in human readable format
- a = attrs.copy()
- if custom_options is not None:
- a["custom_options"] = custom_options
- s = a.pop("type")
- data_format = a.pop("data_format", None)
- if data_format and data_format != b"NHWC":
- s += " " + str(data_format)
- t = a.pop("T", None)
- if t:
- s += " " + str(t)[9:-2]
- srct = a.pop("SrcT", None)
- if srct:
- s += " " + str(srct)[9:-2]
- dstt = a.pop("DstT", None)
- if dstt:
- s += "->" + str(dstt)[9:-2]
- print(s + " " + str(a))
+ def print_operators(self, ignore_placeholder_const=True, show_attributes=True):
+ print(f"Operators of Subgraph {self.name}")
+
+ ignore_ops = (Op.Const, Op.Identity, Op.Placeholder) if ignore_placeholder_const else ()
+ all_ops = [op for op in self.get_all_ops() if op.type not in ignore_ops]
+
+ if len(all_ops) > 0:
+ max_op_type_len = max([len(op.type.name) for op in all_ops])
+
+ for idx, op in enumerate(all_ops):
+ attrs_str = f" - {op.attrs}" if show_attributes else ""
+ print(f"{idx:3}: {op.type:{max_op_type_len}}{attrs_str} - {op.name}")
+
+ else:
+ print("No Operators")
def print_graph(self, label=None):
if label:
@@ -562,9 +542,9 @@ class Graph:
for sg in self.subgraphs:
sg.refresh_after_modification()
- def print_operators(self):
+ def print_operators(self, ignore_placeholder_const=True, show_attributes=True):
for sg in self.subgraphs:
- sg.print_operators()
+ sg.print_operators(ignore_placeholder_const, show_attributes)
def print_graph(self, label=None):
for sg in self.subgraphs:
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index 932f701..4c733cc 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -271,7 +271,7 @@ def pack_into_passes(nng, arch, verbose_packing=False):
assert ifm_tensor.purpose == TensorPurpose.FeatureMap
if operation_set is None:
- print("Warning:", curr_op.type, "operation is unknown or unsupported, placing on CPU")
+ assert not curr_op.run_on_npu # operator should have been placed on the CPU
for inp in reversed(curr_op.inputs):
if inp is None:
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 71fec3b..56aae73 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -1052,6 +1052,7 @@ def generate_command_stream(
"""
emit = CommandStreamEmitter()
if verbose:
+ print("Register-Level Command Stream: Input")
print_operations(npu_op_list, npu_op_to_cmd)
# Calculate memory accesses for every operation
memory_accesses: Dict[NpuOperation, MemoryAccessSet] = {}
@@ -1105,6 +1106,7 @@ def generate_command_stream(
)
if verbose:
+ print("Register-Level Command Stream: Output")
emit.print_cmds()
print(f"Number of commands = {len(emit.cmd_stream)}")
print(f"Command stream length = {emit.size_in_bytes()} bytes")