From cd03504cfc29767d33d37b5c587116ab90752d74 Mon Sep 17 00:00:00 2001 From: Tim Hall Date: Tue, 8 Aug 2023 14:10:17 +0100 Subject: MLBEDSW-7689: Document verbose command stream options - Documented High-Level and register-Level command stream options - Changed High-Level command stream display to show the name of the command - Fixed an issue with some operators not being displayed by the CLI option --verbose-operators - Changed an unneeded print in pass packing to a more useful assertion Change-Id: I9d53f19f4e32d0478209bc964724c27c935f66d6 Signed-off-by: Tim Hall --- OPTIONS.md | 47 ++++++++++++++++++--- ethosu/vela/high_level_command_stream.py | 6 +-- ethosu/vela/nn_graph.py | 54 ++++++++---------------- ethosu/vela/pass_packing.py | 2 +- ethosu/vela/register_command_stream_generator.py | 2 + 5 files changed, 63 insertions(+), 48 deletions(-) diff --git a/OPTIONS.md b/OPTIONS.md index 38f65a7e..646444e1 100644 --- a/OPTIONS.md +++ b/OPTIONS.md @@ -398,8 +398,8 @@ vela network.tflite --verbose-performance Displays a list of all operators and the tensors that are connected to them. Additional information is shown about the tensors. The format is: -` `, where; -` `, where; +` +`, where; num = an increasing operator count op_type = the Graph IR Operator Type op_name = the Graph IR Operator Name (this may have been derived from the @@ -466,9 +466,32 @@ vela network.tflite --verbose-allocation ### Verbose High Level Command Stream -Display a high level command stream with one command per DMA or NPU stripe. The -commands contain information about block configuration as well as IFM-, OFM- -and weight boxes. +Display an enumerated list of High-Level (HL) commands in execution +order. There are three types of command and each one displays individual +information: + +* NPU Stripe = ` +`, represents a data processing operation that maps directly to +a single Ethos-U operation where; +name = name of the pass that corresponds to this HL command (not unique) +ifm_box = part of the IFM in NHWC format +ifm2_box = part of the IFM2 in NHWC format (is empty [] when not present) +ofm_box = part of the OFM in NHWC format +weight_box = part of the filter kernel in NHWC format +block_config = block processing size in HWIO format + +* DMA = ` `, represents a memory copy operation from source to +destination where; +name = name of the pass that corresponds to this HL command (not unique) +in = name of the source tensor +out = name of the destination tensor +box = part of the source tensor in NHWC format + +* NOP = ` `, represents a memory copy operation that has source equal +to destination and therefore does nothing, where; +name = name of the pass that corresponds to this HL command (not unique) +in = name of the input tensor +out = name of the output tensor ```bash vela network.tflite --verbose-high-level-command-stream @@ -476,8 +499,18 @@ vela network.tflite --verbose-high-level-command-stream ### Verbose Register Command Stream -Display all NPU operations and a register level (low level) command stream with -all register settings for the network execution on the NPU. +Display two groups of information. The first group is the input to the register +command stream generator. The second group is the output of the register +command stream generator: + +* Input = an enumerated list of the High-Level commands that are the input to +the generator. Each command details all of its attributes. + +* Output = a disassembly of the Ethos-U command stream (referred to as the +register command stream). More information about the commands listed in the +register command stream can be found in the Arm Ethos-U NPU Technical Reference +Manuals that are available from the Arm Developer website (see +[README - Resources](README.md#resources)). ```bash vela network.tflite --verbose-register-command-stream diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py index 09c1805d..8403b94f 100644 --- a/ethosu/vela/high_level_command_stream.py +++ b/ethosu/vela/high_level_command_stream.py @@ -236,7 +236,7 @@ class NpuStripe(Command): return True def __str__(self): - return "" % ( + return "" % ( self.ps.name, self.ifm_box, self.ifm2_box, @@ -286,7 +286,7 @@ class DMA(Command): self.box = box def __str__(self): - return "" % (self.in_tensor.name, self.out_tensor.name, self.box) + return f"" __repr__ = __str__ @@ -302,7 +302,7 @@ class NOP(Command): self.out_tensor = out_tensor def __str__(self): - return f"" + return f"" __repr__ = __str__ diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py index a43aac2a..6dc6b583 100644 --- a/ethosu/vela/nn_graph.py +++ b/ethosu/vela/nn_graph.py @@ -338,41 +338,21 @@ class Subgraph: return all_ops - def print_operators(self): - print("print_operators()", self.name) - all_ops = self.get_all_ops() - unique_ops = [] - for op in all_ops: - if op.type in (Op.Const, Op.Identity, Op.Placeholder): - continue - - attrs = op.attrs.copy() - if op.type in (Op.Conv2D, Op.Conv2DBias, Op.DepthwiseConv2DBias): - kshape = op.inputs[1].shape - attrs["kshape"] = [kshape[0], kshape[1]] - attrs["type"] = op.type.name - attrs.pop("use_cudnn_on_gpu", None) - custom_options = attrs.pop("custom_options", None) - if attrs not in unique_ops: - unique_ops.append(attrs) - # print attributes in human readable format - a = attrs.copy() - if custom_options is not None: - a["custom_options"] = custom_options - s = a.pop("type") - data_format = a.pop("data_format", None) - if data_format and data_format != b"NHWC": - s += " " + str(data_format) - t = a.pop("T", None) - if t: - s += " " + str(t)[9:-2] - srct = a.pop("SrcT", None) - if srct: - s += " " + str(srct)[9:-2] - dstt = a.pop("DstT", None) - if dstt: - s += "->" + str(dstt)[9:-2] - print(s + " " + str(a)) + def print_operators(self, ignore_placeholder_const=True, show_attributes=True): + print(f"Operators of Subgraph {self.name}") + + ignore_ops = (Op.Const, Op.Identity, Op.Placeholder) if ignore_placeholder_const else () + all_ops = [op for op in self.get_all_ops() if op.type not in ignore_ops] + + if len(all_ops) > 0: + max_op_type_len = max([len(op.type.name) for op in all_ops]) + + for idx, op in enumerate(all_ops): + attrs_str = f" - {op.attrs}" if show_attributes else "" + print(f"{idx:3}: {op.type:{max_op_type_len}}{attrs_str} - {op.name}") + + else: + print("No Operators") def print_graph(self, label=None): if label: @@ -562,9 +542,9 @@ class Graph: for sg in self.subgraphs: sg.refresh_after_modification() - def print_operators(self): + def print_operators(self, ignore_placeholder_const=True, show_attributes=True): for sg in self.subgraphs: - sg.print_operators() + sg.print_operators(ignore_placeholder_const, show_attributes) def print_graph(self, label=None): for sg in self.subgraphs: diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py index 932f701b..4c733cce 100644 --- a/ethosu/vela/pass_packing.py +++ b/ethosu/vela/pass_packing.py @@ -271,7 +271,7 @@ def pack_into_passes(nng, arch, verbose_packing=False): assert ifm_tensor.purpose == TensorPurpose.FeatureMap if operation_set is None: - print("Warning:", curr_op.type, "operation is unknown or unsupported, placing on CPU") + assert not curr_op.run_on_npu # operator should have been placed on the CPU for inp in reversed(curr_op.inputs): if inp is None: diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index 71fec3be..56aae73d 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -1052,6 +1052,7 @@ def generate_command_stream( """ emit = CommandStreamEmitter() if verbose: + print("Register-Level Command Stream: Input") print_operations(npu_op_list, npu_op_to_cmd) # Calculate memory accesses for every operation memory_accesses: Dict[NpuOperation, MemoryAccessSet] = {} @@ -1105,6 +1106,7 @@ def generate_command_stream( ) if verbose: + print("Register-Level Command Stream: Output") emit.print_cmds() print(f"Number of commands = {len(emit.cmd_stream)}") print(f"Command stream length = {emit.size_in_bytes()} bytes") -- cgit v1.2.1