aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--DEBUG_DB.md72
-rw-r--r--ethosu/vela/high_level_command_to_npu_op.py19
-rw-r--r--ethosu/vela/nn_graph.py3
-rw-r--r--ethosu/vela/vela.py37
4 files changed, 120 insertions, 11 deletions
diff --git a/DEBUG_DB.md b/DEBUG_DB.md
new file mode 100644
index 00000000..2b530c5e
--- /dev/null
+++ b/DEBUG_DB.md
@@ -0,0 +1,72 @@
+# Debug database
+
+The purpose of the debug database is to track operator transformations during
+the optimisation process of Vela. This is later correlated with the trace
+output of the model, externally, to determine the runtime of the original layer
+operators. Standalone, the debug database can be used in order to give a brief
+overview of how the operators in the network change throughout the optimisation
+process. This document gives an overview of the structure of the database and
+its outputs, to help parsing of the generated data in a debug procedure.
+
+# Contents
+
+While processing, Vela maintains information about operator substitutions and
+command generation in its internal Debug Database. The database tracks the data
+transformations through the following states:
+
+- Creation of Source operators - these operators are created from the source
+representation, in this case the original TFLite file.
+- Creation of Optimised operators - these are the operators that result from
+ optimising the source operators. They may be the source operators repeated,
+or substitute operators inserted by the optimiser.
+- Creation of Queue commands - these are the register command sequences
+generated by the code generator from the optimised operators.
+
+Vela's processing steps add data to internal debug tables; one table for each
+of the above states. When vela has completed processing, it can write out the
+internal debug tables through the command line option "++enable-debug-db".
+
+# File Format
+
+The internal debug tables are formatted as columnar CSV. Each row represents an
+operator or stream command; keyed on a numeric value that uniquely identifies
+that operator or command. These tables are further packaged into an XML
+container file, along with metadata, for easier transport and handling.
+
+**Debug node**
+
+The top-level debug node wraps the entire file and contains information about
+the source and optimised file paths.
+
+<debug optimised="output_from_vela.tflite" source="input.tflite">
+
+**Table nodes**
+
+The top-level debug node contains one or more table nodes. Each table node is
+named, and the table data is represented as CSV formatted text stored in a
+CDATA payload tag. The first row of the table contains column headers.
+
+<table name="source"><![CDATA[ "column0", "column1", "column2", ...
+
+There currently are 4 named tables.
+
+- "source" - Table of TFLite operators from the source file.
+- "optimised" - Table of optimised operators generated by vela
+- "queue" - Table of command queue offsets
+- "cmdstream" - Table describing properties of one or more command streams
+
+
+The tables reference each other through the following connections: the Queue
+table is linked to the Optimised table through the *optimised_id* and the
+command stream table through the *cmdstream_id*. The Optimised table is in turn
+linked to the Source table through the *source_id*.
+
+# Ordering
+
+Note that the source, cmdstream and optimised tables are not ordered in a
+meaningful way. The insertion order of entries in the source and optimised
+tables is arbitrarty - a side effect of traversal and optimisation. No attempt
+should be made to interpret the entries as a graph. The only ordered table is
+the queue table; which is ordered by its queue offset. This table describes the
+execution order of the hardware commands, and they can be mapped back to the
+optimised and source operators in order to determine their execution order. \ No newline at end of file
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 8d6fc871..b5e7b4b9 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -501,13 +501,14 @@ def generate_register_command_stream_for_sg(nng, sg, arch, verbose=False):
npu_op_list.append(npu_op)
npu_op_to_cmd[npu_op] = cmd
# Generate register commands
- stream_id = DebugDatabase.add_stream(sg)
- DebugDatabase.set_stream_offset(sg, 0) # Default to zero, can only set during file writing
+ if len(sg.high_level_command_stream) > 0:
+ stream_id = DebugDatabase.add_stream(sg)
+ sg.generated_stream_id = stream_id
- def add_to_debug_db(npu_op: NpuOperation, offset: int):
- """Adds info to the debug database"""
- if not isinstance(npu_op, NpuDmaOperation):
- cmd = npu_op_to_cmd[npu_op]
- DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op)
+ def add_to_debug_db(npu_op: NpuOperation, offset: int):
+ """Adds info to the debug database"""
+ if not isinstance(npu_op, NpuDmaOperation):
+ cmd = npu_op_to_cmd[npu_op]
+ DebugDatabase.add_command(stream_id, offset, cmd.ps.primary_op)
- sg.register_command_stream = generate_command_stream(npu_op_list, arch, verbose, add_to_debug_db, npu_op_to_cmd)
+ sg.register_command_stream = generate_command_stream(npu_op_list, arch, verbose, add_to_debug_db, npu_op_to_cmd)
diff --git a/ethosu/vela/nn_graph.py b/ethosu/vela/nn_graph.py
index 71d4e614..db878bc3 100644
--- a/ethosu/vela/nn_graph.py
+++ b/ethosu/vela/nn_graph.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -150,6 +150,7 @@ class Subgraph:
self.flash_tensor = None
# Scratch information locally used in the scheduler
self.scheduling_info = {}
+ self.generated_stream_id = None
self.memory_used = {}
self.memory_used_per_type = {}
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index bfc76ec9..c4510b18 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -23,6 +23,8 @@ import os
import sys
import time
+import flatbuffers
+
from . import architecture_features
from . import compiler_driver
from . import model_reader
@@ -39,6 +41,7 @@ from .scheduler import ParetoMetric
from .supported_operators import SupportedOperators
from .tensor import MemArea
from .tensor import Tensor
+from .tflite.Model import Model
from .tflite_mapping import builtin_operator_map
from .tflite_mapping import builtin_type_name
from ethosu.vela.architecture_features import ArchitectureFeatures
@@ -80,6 +83,11 @@ def process(input_name, enable_debug_db, arch, model_reader_options, compiler_op
tflite_writer.write_tflite(nng, output_filename)
if enable_debug_db:
+ file_offsets = calculate_operator_file_offsets(output_filename)
+ for idx, offset in enumerate(sorted(file_offsets)):
+ sg = find_subgraph_with_command_stream_order(nng, idx)
+ if sg is not None:
+ DebugDatabase.set_stream_offset(sg, offset)
debug_filename = output_basename + "_debug.xml"
DebugDatabase.write(debug_filename, input_name, output_filename)
@@ -90,6 +98,33 @@ def process(input_name, enable_debug_db, arch, model_reader_options, compiler_op
return nng
+def find_subgraph_with_command_stream_order(nng, idx):
+ for sg in nng.subgraphs:
+ if sg.generated_stream_id == idx:
+ return sg
+ return None
+
+
+def calculate_operator_file_offsets(name: str):
+ # Read the vela optimized tflite file
+ with open(name, "rb") as f:
+ buf = bytearray(f.read())
+ # Calculate the file offsets for each custom operator
+ file_offsets = []
+ model = Model.GetRootAsModel(buf, 0)
+ for idx in range(model.SubgraphsLength()): # However only one subgraph is supported as of now
+ sg = model.Subgraphs(idx)
+ for idx in range(sg.OperatorsLength()):
+ operator = sg.Operators(idx)
+ if model.OperatorCodes(operator.OpcodeIndex()).CustomCode() is not None:
+ tensor_idx = operator.Inputs(0)
+ tensor = sg.Tensors(tensor_idx)
+ buffer = model.Buffers(tensor.Buffer())
+ offset = flatbuffers.number_types.UOffsetTFlags.py_type(buffer._tab.Offset(4))
+ file_offsets.append(buffer._tab.Vector(offset))
+ return file_offsets
+
+
def print_subgraph_io_summary(nng):
"""Print a summary of all the input and output tensor sizes for all subgraphs.
Also displays the total tensor size and the memory used area for sram.