From ad45f792e699fe6abdc381f62690801aa50bd412 Mon Sep 17 00:00:00 2001 From: "erik.andersson@arm.com" Date: Wed, 3 Feb 2021 10:20:16 +0100 Subject: MLBEDSW-3509: Updated the debug database to support multiple custom operators. Previously the debug database lost some operators in the debug database outputs when multiple custom operators were generated by Vela. Also, the file offsets for command streams were always 0, even for a single custom operator. This patch should rectify these problems. Signed-off-by: erik.andersson@arm.com Change-Id: Ieb072440d4f1806d4833a676683b4f42f431f3df --- DEBUG_DB.md | 72 +++++++++++++++++++++++++++++ ethosu/vela/high_level_command_to_npu_op.py | 19 ++++---- ethosu/vela/nn_graph.py | 3 +- ethosu/vela/vela.py | 37 ++++++++++++++- 4 files changed, 120 insertions(+), 11 deletions(-) create mode 100644 DEBUG_DB.md diff --git a/DEBUG_DB.md b/DEBUG_DB.md new file mode 100644 index 00000000..2b530c5e --- /dev/null +++ b/DEBUG_DB.md @@ -0,0 +1,72 @@ +# Debug database + +The purpose of the debug database is to track operator transformations during +the optimisation process of Vela. This is later correlated with the trace +output of the model, externally, to determine the runtime of the original layer +operators. Standalone, the debug database can be used in order to give a brief +overview of how the operators in the network change throughout the optimisation +process. This document gives an overview of the structure of the database and +its outputs, to help parsing of the generated data in a debug procedure. + +# Contents + +While processing, Vela maintains information about operator substitutions and +command generation in its internal Debug Database. The database tracks the data +transformations through the following states: + +- Creation of Source operators - these operators are created from the source +representation, in this case the original TFLite file. +- Creation of Optimised operators - these are the operators that result from + optimising the source operators. They may be the source operators repeated, +or substitute operators inserted by the optimiser. +- Creation of Queue commands - these are the register command sequences +generated by the code generator from the optimised operators. + +Vela's processing steps add data to internal debug tables; one table for each +of the above states. When vela has completed processing, it can write out the +internal debug tables through the command line option "++enable-debug-db". + +# File Format + +The internal debug tables are formatted as columnar CSV. Each row represents an +operator or stream command; keyed on a numeric value that uniquely identifies +that operator or command. These tables are further packaged into an XML +container file, along with metadata, for easier transport and handling. + +**Debug node** + +The top-level debug node wraps the entire file and contains information about +the source and optimised file paths. + + + +**Table nodes** + +The top-level debug node contains one or more table nodes. Each table node is +named, and the table data is represented as CSV formatted text stored in a +CDATA payload tag. The first row of the table contains column headers. + + 0: + stream_id = DebugDatabase.add_stream(sg) + sg.generated_stream_id = stream_id - def add_to_debug_db(npu_op: NpuOperation, offset: int): - """Adds info to the debug database""" - if not isinstance(npu_op, NpuDmaOperation): - cmd = npu_op_to_cmd[npu_op] - DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op) + def add_to_debug_db(npu_op: NpuOperation, offset: int): + """Adds info to the debug database""" + if not isinstance(npu_op, NpuDmaOperation): + cmd = npu_op_to_cmd[npu_op] + DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op) - sg.register_command_stream = generate_command_stream(npu_op_list, arch, verbose, add_to_debug_db, npu_op_to_cmd) + sg.register_command_stream = generate_command_stream(npu_op_list, arch, verbose, add_to_debug_db, npu_op_to_cmd) diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py index 71d4e614..db878bc3 100644 --- a/ethosu/vela/nn_graph.py +++ b/ethosu/vela/nn_graph.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. +# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 # @@ -150,6 +150,7 @@ class Subgraph: self.flash_tensor = None # Scratch information locally used in the scheduler self.scheduling_info = {} + self.generated_stream_id = None self.memory_used = {} self.memory_used_per_type = {} diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py index bfc76ec9..c4510b18 100644 --- a/ethosu/vela/vela.py +++ b/ethosu/vela/vela.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. +# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 # @@ -23,6 +23,8 @@ import os import sys import time +import flatbuffers + from . import architecture_features from . import compiler_driver from . import model_reader @@ -39,6 +41,7 @@ from .scheduler import ParetoMetric from .supported_operators import SupportedOperators from .tensor import MemArea from .tensor import Tensor +from .tflite.Model import Model from .tflite_mapping import builtin_operator_map from .tflite_mapping import builtin_type_name from ethosu.vela.architecture_features import ArchitectureFeatures @@ -80,6 +83,11 @@ def process(input_name, enable_debug_db, arch, model_reader_options, compiler_op tflite_writer.write_tflite(nng, output_filename) if enable_debug_db: + file_offsets = calculate_operator_file_offsets(output_filename) + for idx, offset in enumerate(sorted(file_offsets)): + sg = find_subgraph_with_command_stream_order(nng, idx) + if sg is not None: + DebugDatabase.set_stream_offset(sg, offset) debug_filename = output_basename + "_debug.xml" DebugDatabase.write(debug_filename, input_name, output_filename) @@ -90,6 +98,33 @@ def process(input_name, enable_debug_db, arch, model_reader_options, compiler_op return nng +def find_subgraph_with_command_stream_order(nng, idx): + for sg in nng.subgraphs: + if sg.generated_stream_id == idx: + return sg + return None + + +def calculate_operator_file_offsets(name: str): + # Read the vela optimized tflite file + with open(name, "rb") as f: + buf = bytearray(f.read()) + # Calculate the file offsets for each custom operator + file_offsets = [] + model = Model.GetRootAsModel(buf, 0) + for idx in range(model.SubgraphsLength()): # However only one subgraph is supported as of now + sg = model.Subgraphs(idx) + for idx in range(sg.OperatorsLength()): + operator = sg.Operators(idx) + if model.OperatorCodes(operator.OpcodeIndex()).CustomCode() is not None: + tensor_idx = operator.Inputs(0) + tensor = sg.Tensors(tensor_idx) + buffer = model.Buffers(tensor.Buffer()) + offset = flatbuffers.number_types.UOffsetTFlags.py_type(buffer._tab.Offset(4)) + file_offsets.append(buffer._tab.Vector(offset)) + return file_offsets + + def print_subgraph_io_summary(nng): """Print a summary of all the input and output tensor sizes for all subgraphs. Also displays the total tensor size and the memory used area for sram. -- cgit v1.2.1