diff options
-rw-r--r-- | DEBUG_DB.md | 72 | ||||
-rw-r--r-- | ethosu/vela/high_level_command_to_npu_op.py | 19 | ||||
-rw-r--r-- | ethosu/vela/nn_graph.py | 3 | ||||
-rw-r--r-- | ethosu/vela/vela.py | 37 |
4 files changed, 120 insertions, 11 deletions
diff --git a/DEBUG_DB.md b/DEBUG_DB.md new file mode 100644 index 00000000..2b530c5e --- /dev/null +++ b/DEBUG_DB.md @@ -0,0 +1,72 @@ +# Debug database + +The purpose of the debug database is to track operator transformations during +the optimisation process of Vela. This is later correlated with the trace +output of the model, externally, to determine the runtime of the original layer +operators. Standalone, the debug database can be used in order to give a brief +overview of how the operators in the network change throughout the optimisation +process. This document gives an overview of the structure of the database and +its outputs, to help parsing of the generated data in a debug procedure. + +# Contents + +While processing, Vela maintains information about operator substitutions and +command generation in its internal Debug Database. The database tracks the data +transformations through the following states: + +- Creation of Source operators - these operators are created from the source +representation, in this case the original TFLite file. +- Creation of Optimised operators - these are the operators that result from + optimising the source operators. They may be the source operators repeated, +or substitute operators inserted by the optimiser. +- Creation of Queue commands - these are the register command sequences +generated by the code generator from the optimised operators. + +Vela's processing steps add data to internal debug tables; one table for each +of the above states. When vela has completed processing, it can write out the +internal debug tables through the command line option "++enable-debug-db". + +# File Format + +The internal debug tables are formatted as columnar CSV. Each row represents an +operator or stream command; keyed on a numeric value that uniquely identifies +that operator or command. These tables are further packaged into an XML +container file, along with metadata, for easier transport and handling. + +**Debug node** + +The top-level debug node wraps the entire file and contains information about +the source and optimised file paths. + +<debug optimised="output_from_vela.tflite" source="input.tflite"> + +**Table nodes** + +The top-level debug node contains one or more table nodes. Each table node is +named, and the table data is represented as CSV formatted text stored in a +CDATA payload tag. The first row of the table contains column headers. + +<table name="source"><![CDATA[ "column0", "column1", "column2", ... + +There currently are 4 named tables. + +- "source" - Table of TFLite operators from the source file. +- "optimised" - Table of optimised operators generated by vela +- "queue" - Table of command queue offsets +- "cmdstream" - Table describing properties of one or more command streams + + +The tables reference each other through the following connections: the Queue +table is linked to the Optimised table through the *optimised_id* and the +command stream table through the *cmdstream_id*. The Optimised table is in turn +linked to the Source table through the *source_id*. + +# Ordering + +Note that the source, cmdstream and optimised tables are not ordered in a +meaningful way. The insertion order of entries in the source and optimised +tables is arbitrarty - a side effect of traversal and optimisation. No attempt +should be made to interpret the entries as a graph. The only ordered table is +the queue table; which is ordered by its queue offset. This table describes the +execution order of the hardware commands, and they can be mapped back to the +optimised and source operators in order to determine their execution order.
\ No newline at end of file diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py index 8d6fc871..b5e7b4b9 100644 --- a/ethosu/vela/high_level_command_to_npu_op.py +++ b/ethosu/vela/high_level_command_to_npu_op.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. +# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 # @@ -501,13 +501,14 @@ def generate_register_command_stream_for_sg(nng, sg, arch, verbose=False): npu_op_list.append(npu_op) npu_op_to_cmd[npu_op] = cmd # Generate register commands - stream_id = DebugDatabase.add_stream(sg) - DebugDatabase.set_stream_offset(sg, 0) # Default to zero, can only set during file writing + if len(sg.high_level_command_stream) > 0: + stream_id = DebugDatabase.add_stream(sg) + sg.generated_stream_id = stream_id - def add_to_debug_db(npu_op: NpuOperation, offset: int): - """Adds info to the debug database""" - if not isinstance(npu_op, NpuDmaOperation): - cmd = npu_op_to_cmd[npu_op] - DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op) + def add_to_debug_db(npu_op: NpuOperation, offset: int): + """Adds info to the debug database""" + if not isinstance(npu_op, NpuDmaOperation): + cmd = npu_op_to_cmd[npu_op] + DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op) - sg.register_command_stream = generate_command_stream(npu_op_list, arch, verbose, add_to_debug_db, npu_op_to_cmd) + sg.register_command_stream = generate_command_stream(npu_op_list, arch, verbose, add_to_debug_db, npu_op_to_cmd) diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py index 71d4e614..db878bc3 100644 --- a/ethosu/vela/nn_graph.py +++ b/ethosu/vela/nn_graph.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. +# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 # @@ -150,6 +150,7 @@ class Subgraph: self.flash_tensor = None # Scratch information locally used in the scheduler self.scheduling_info = {} + self.generated_stream_id = None self.memory_used = {} self.memory_used_per_type = {} diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py index bfc76ec9..c4510b18 100644 --- a/ethosu/vela/vela.py +++ b/ethosu/vela/vela.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. +# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 # @@ -23,6 +23,8 @@ import os import sys import time +import flatbuffers + from . import architecture_features from . import compiler_driver from . import model_reader @@ -39,6 +41,7 @@ from .scheduler import ParetoMetric from .supported_operators import SupportedOperators from .tensor import MemArea from .tensor import Tensor +from .tflite.Model import Model from .tflite_mapping import builtin_operator_map from .tflite_mapping import builtin_type_name from ethosu.vela.architecture_features import ArchitectureFeatures @@ -80,6 +83,11 @@ def process(input_name, enable_debug_db, arch, model_reader_options, compiler_op tflite_writer.write_tflite(nng, output_filename) if enable_debug_db: + file_offsets = calculate_operator_file_offsets(output_filename) + for idx, offset in enumerate(sorted(file_offsets)): + sg = find_subgraph_with_command_stream_order(nng, idx) + if sg is not None: + DebugDatabase.set_stream_offset(sg, offset) debug_filename = output_basename + "_debug.xml" DebugDatabase.write(debug_filename, input_name, output_filename) @@ -90,6 +98,33 @@ def process(input_name, enable_debug_db, arch, model_reader_options, compiler_op return nng +def find_subgraph_with_command_stream_order(nng, idx): + for sg in nng.subgraphs: + if sg.generated_stream_id == idx: + return sg + return None + + +def calculate_operator_file_offsets(name: str): + # Read the vela optimized tflite file + with open(name, "rb") as f: + buf = bytearray(f.read()) + # Calculate the file offsets for each custom operator + file_offsets = [] + model = Model.GetRootAsModel(buf, 0) + for idx in range(model.SubgraphsLength()): # However only one subgraph is supported as of now + sg = model.Subgraphs(idx) + for idx in range(sg.OperatorsLength()): + operator = sg.Operators(idx) + if model.OperatorCodes(operator.OpcodeIndex()).CustomCode() is not None: + tensor_idx = operator.Inputs(0) + tensor = sg.Tensors(tensor_idx) + buffer = model.Buffers(tensor.Buffer()) + offset = flatbuffers.number_types.UOffsetTFlags.py_type(buffer._tab.Offset(4)) + file_offsets.append(buffer._tab.Vector(offset)) + return file_offsets + + def print_subgraph_io_summary(nng): """Print a summary of all the input and output tensor sizes for all subgraphs. Also displays the total tensor size and the memory used area for sram. |