aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ethosu/vela/compiler_driver.py5
-rw-r--r--ethosu/vela/live_range.py32
-rw-r--r--ethosu/vela/tflite_reader.py26
-rw-r--r--ethosu/vela/tflite_writer.py87
4 files changed, 97 insertions, 53 deletions
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index cace0f08..61a3b0b1 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -233,7 +233,10 @@ def compiler_driver(nng, arch, options, scheduler_options, network_type, output_
sg, arch, scratch_tens, scratch_fast_tens, flash_tens
)
- npu_serialisation.rewrite_npu_call_ops(root_sg, arch)
+ # Create list of CPU subgraphs with same order as the list of all subgraphs
+ cpu_subgraphs = [sg for sg in nng.subgraphs if sg.placement == PassPlacement.Cpu]
+ for sg in cpu_subgraphs:
+ npu_serialisation.rewrite_npu_call_ops(sg, arch)
# Set Scratch and Fast_scratch Tensor size
if scratch_tens is not None:
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py
index e683f9f5..9b6fe63d 100644
--- a/ethosu/vela/live_range.py
+++ b/ethosu/vela/live_range.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -224,18 +224,24 @@ def extract_live_ranges_from_cascaded_passes(
rng = lr_graph.get_or_create_range(tens, cpu_tensor_alignment)
rng.mark_usage(time_for_pass)
- cps_primary_op = cps.passes[0].primary_op
-
- if (
- cps_primary_op
- and cps_primary_op.type == Op.CustomNpuOp
- and MemType.Permanent_CPU not in target_mem_type_set
- ):
- # If the primary-op is an NpuOp that means this is where an Npu subgraph
- # is called. Go into said subgraph and extract live ranges before continuing.
- # Use default allocation alignment of 16 for Npu tensors
- npu_sg = cps_primary_op.attrs["subgraph"]
- lr_graph = _extract_live_ranges_from_schedule(npu_sg, target_mem_area, target_mem_type_set, lr_graph)
+ op_subgraph = cps.passes[0].ops[0].attrs.get("subgraph", None)
+ op_type = cps.passes[0].ops[0].type
+
+ if op_subgraph is not None and MemType.Permanent_CPU not in target_mem_type_set:
+ if op_type == Op.CustomNpuOp:
+ # If the primary-op is an NpuOp that means this is where an Npu subgraph
+ # is called. Go into said subgraph and extract live ranges before continuing.
+ # Use default allocation alignment of 16 for Npu tensors
+ lr_graph = _extract_live_ranges_from_schedule(
+ op_subgraph, target_mem_area, target_mem_type_set, lr_graph
+ )
+ else:
+ # The op has one or more subgraphs in it (a typical op is the While op)
+ # Go into all subgraphs and extract live ranges before continuing.
+ for op_sg in op_subgraph:
+ lr_graph = extract_live_ranges_from_cascaded_passes(
+ op_sg, target_mem_area, target_mem_type_set, lr_graph, cpu_tensor_alignment
+ )
# Set the new time after handling the Npu subgraph
time_for_pass = lr_graph.current_time
cps.time = time_for_pass
diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py
index 8dc5efe1..fa90ad9e 100644
--- a/ethosu/vela/tflite_reader.py
+++ b/ethosu/vela/tflite_reader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -147,6 +147,15 @@ class TFLiteSubgraph:
if opt_serializer is not None:
op.attrs = opt_serializer.deserialize(op_data)
+ if op_type == Op.While:
+ # Attach the actual nng subgraphs to the op
+ cond_subgraph_index = op.attrs["cond_subgraph_index"]
+ body_subgraph_index = op.attrs["body_subgraph_index"]
+ op.attrs["subgraph"] = (
+ self.graph.nng.subgraphs[cond_subgraph_index],
+ self.graph.nng.subgraphs[body_subgraph_index],
+ )
+
if op_type == Op.Reshape and "new_shape" not in op.attrs:
# Reshape should have an attrib "new_shape" but if it is missing, add it based on the output shape
op.attrs["new_shape"] = outputs[0].shape
@@ -223,16 +232,23 @@ class TFLiteGraph:
parsing_step = "parsing subgraphs length"
self.subgraphs = []
+
+ # Pre-allocate nng subgraphs - needed when parsing an operator and the operator
+ # has subgraph attributes.
+ self.nng = Graph(self.name, self.batch_size)
+ for idx in range(model.SubgraphsLength()):
+ sg = Subgraph()
+ self.nng.subgraphs.append(sg)
+
for idx in range(model.SubgraphsLength()):
parsing_step = f"parsing subgraph {idx}"
self.subgraphs.append(TFLiteSubgraph(self, model.Subgraphs(idx)))
- self.nng = Graph(self.name, self.batch_size)
- for tflite_sg in self.subgraphs:
- sg = Subgraph(tflite_sg.name)
+ for idx, tflite_sg in enumerate(self.subgraphs):
+ sg = self.nng.subgraphs[idx]
+ sg.name = tflite_sg.name
sg.original_inputs = tflite_sg.inputs # Preserve the original input order
sg.output_tensors = tflite_sg.outputs
- self.nng.subgraphs.append(sg)
parsing_step = "parsing metadata length"
# Preserve the original metadata
diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py
index 7aab01f2..ce53f9b1 100644
--- a/ethosu/vela/tflite_writer.py
+++ b/ethosu/vela/tflite_writer.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
+# Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -71,13 +71,19 @@ def make_vector(v):
class TFLiteSerialiser:
+
+ BUF_IDX_SCRATCH = 0 # Always assign scratch to buffer 0
+ BUF_IDX_SCRATCH_FAST = 1 # Always assign scratch_fast to buffer 1
+ BUF_IDX_START = 2 # Unique buffer id for every tensor in all subgraphs
+
def __init__(self, nng):
self.builder = flatbuffers.Builder(0)
self.nng = nng
- self.scratch_buf_id = 0 # Always assign scratch to buffer 0
- self.scratch_fast_buf_id = 1 # Always assign scratch_fast to buffer 1
+ self.buf_idx = TFLiteSerialiser.BUF_IDX_START
self.buffers_to_write = [] # have an empty array there
+ self.tensor_map_all = [] # Keep track of all subgraphs
+ self.tensor_map_sg = [] # Keep track of one subgraph
self.ops_to_ignore = (Op.Const, Op.Placeholder, Op.SubgraphInput)
@@ -154,22 +160,20 @@ class TFLiteSerialiser:
buffer_map = {}
- buf_idx = 2
-
for tens in tensors:
# Set buffer ids depending on allocation
if tens.is_allocated_in_tensor_arena(scratch_tensor_mem_area):
- buffer_map[tens] = self.scratch_buf_id
+ buffer_map[tens] = TFLiteSerialiser.BUF_IDX_SCRATCH
elif tens.mem_type == MemType.Scratch_fast:
# For Scratch_fast when not co-allocated with scratch in the TensorArena:
- buffer_map[tens] = self.scratch_fast_buf_id
+ buffer_map[tens] = TFLiteSerialiser.BUF_IDX_SCRATCH_FAST
else:
- buffer_map[tens] = buf_idx
- buf_idx += 1
+ buffer_map[tens] = self.buf_idx
+ self.buf_idx += 1
- # Initialize buffers_to_write to a length equal to number of buffers so
+ # Initialize/extend buffers_to_write to a length equal to number of buffers so
# they can be appended at the correct index during tensor serialization
- self.buffers_to_write = [None] * (buf_idx)
+ self.buffers_to_write += [None] * (self.buf_idx)
return buffer_map
@@ -281,13 +285,13 @@ class TFLiteSerialiser:
builder = self.builder
inputs_offset = self.write_int_vector(
- [self.tensor_map[tens] if tens in self.tensor_map else -1 for tens in op.inputs]
+ [self.tensor_map_sg[tens] if tens in self.tensor_map_sg else -1 for tens in op.inputs]
)
outputs_offset = self.write_int_vector(
- [self.tensor_map[tens] for tens in op.outputs if tens in self.tensor_map]
+ [self.tensor_map_sg[tens] for tens in op.outputs if tens in self.tensor_map_sg]
)
intermediates_offset = self.write_int_vector(
- [self.tensor_map[tens] for tens in op.intermediates if tens in self.tensor_map]
+ [self.tensor_map_sg[tens] for tens in op.intermediates if tens in self.tensor_map_sg]
)
if op.type == Op.Custom:
@@ -331,9 +335,8 @@ class TFLiteSerialiser:
Operator.OperatorAddMutatingVariableInputs(builder, mutating_variable_inputs_offset)
return Operator.OperatorEnd(builder)
- def serialise_subgraph(self, sg):
+ def serialise_subgraph(self, sg, name):
builder = self.builder
- tensor_set = set()
all_ops = []
placeholder_ops = []
@@ -344,6 +347,14 @@ class TFLiteSerialiser:
elif op.type == Op.Placeholder:
placeholder_ops.append(op)
+ # Make sure all original tensors are written back, special case for Ops
+ # with connected subgraphs. Even though not all inputs are used,
+ # the reference kernel expects all inputs to be in the tflite file.
+ # Since we traverse the graph starting with all outputs they are
+ # always added but if an input is not referenced it will not be added
+ # to an op.
+ tensor_set = set(sg.original_inputs)
+
# Add the tensors from all valid ops, as well as the tensors from placeholder ops
# This allows us to serialise tensors which arent attached to any specific ops,
# e.g. due to an empty graph containing no ops
@@ -362,18 +373,19 @@ class TFLiteSerialiser:
assert len(scratch_tensors) == 1, "Multiple scratch tensors"
scratch_tensor = scratch_tensors[0]
- self.tensor_map = {tens: idx for idx, tens in enumerate(all_tensors)}
+ self.tensor_map_sg = {tens: idx for idx, tens in enumerate(all_tensors)}
self.buffer_map = self.assign_buffers_to_tensors(all_tensors, scratch_tensor)
+ self.tensor_map_all.append(self.tensor_map_sg)
tensors_offset = self.write_offset_vector([self.serialise_tensor(tens) for tens in all_tensors])
# Make sure the input_tensors haven't been modified
assert all(inp in sg.original_inputs for inp in sg.input_tensors)
- inputs = [self.tensor_map[tens] for tens in sg.original_inputs if tens in self.tensor_map]
+ inputs = [self.tensor_map_sg[tens] for tens in sg.original_inputs if tens in self.tensor_map_sg]
inputs_offset = self.write_int_vector(inputs)
outputs_offset = self.write_int_vector(
- [self.tensor_map[tens] for tens in sg.output_tensors if tens in self.tensor_map]
+ [self.tensor_map_sg[tens] for tens in sg.output_tensors if tens in self.tensor_map_sg]
)
operators_offset = self.write_offset_vector([self.serialise_operator(op) for op in all_ops])
@@ -384,6 +396,7 @@ class TFLiteSerialiser:
SubGraph.SubGraphAddOutputs(builder, outputs_offset)
SubGraph.SubGraphAddOperators(builder, operators_offset)
+ SubGraph.SubGraphAddName(builder, name)
return SubGraph.SubGraphEnd(builder)
@@ -427,26 +440,32 @@ class TFLiteSerialiser:
description = builder.CreateString("Vela Optimised")
- subgraph_offset = self.write_offset_vector([self.serialise_subgraph(sg) for sg in self.subgraphs_to_write])
+ subgraph_offset = self.write_offset_vector(
+ [self.serialise_subgraph(sg, builder.CreateString(sg.name)) for sg in self.subgraphs_to_write]
+ )
# Fill the metadata buffer
version = np.int32(0)
- subgraph_idx = np.int32(len(self.subgraphs_to_write)) # Only 1 supported currently
- nbr_tensors = np.int32(len(self.tensor_map))
+ subgraph_idx = np.int32(len(self.subgraphs_to_write))
+
+ nbr_tensors_all = np.sum([len(tensor_map_sg) for tensor_map_sg in self.tensor_map_all], dtype=np.int32)
+
+ offlineAlloc = [version, subgraph_idx, nbr_tensors_all]
if not any([name == b"OfflineMemoryAllocation" for name, _ in self.nng.metadata]):
- # An offset of -1 indicates that the tensor will be allocated online by Tensorflow Lite Micro
- offsets = [np.int32(-1)] * nbr_tensors
-
- # Ensure that the order of the offsets match the order of the tensors
- for tens, idx in self.tensor_map.items():
- # Set offsets for tensor allocated in Tensor Arena or in the scratch_fast area
- if tens.mem_type in (MemType.Scratch, MemType.Scratch_fast):
- offsets[idx] = np.int32(tens.address) if tens.address is not None else np.int32(0)
-
- self.nng.metadata.append(
- ("OfflineMemoryAllocation", np.array([version, subgraph_idx, nbr_tensors] + offsets))
- )
+ for tensor_map_sg in self.tensor_map_all:
+ nbr_tensors_sg = np.int32(len(tensor_map_sg))
+ # An offset of -1 indicates that the tensor will be allocated online by Tensorflow Lite Micro
+ offsets = [np.int32(-1)] * nbr_tensors_sg
+ # Ensure that the order of the offsets match the order of the tensors
+ for tens, idx in tensor_map_sg.items():
+ # Set offsets for tensor allocated in Tensor Arena or in the scratch_fast area
+ if tens.mem_type in (MemType.Scratch, MemType.Scratch_fast):
+ offsets[idx] = np.int32(tens.address) if tens.address is not None else np.int32(0)
+
+ offlineAlloc += offsets
+
+ self.nng.metadata.append(("OfflineMemoryAllocation", np.array(offlineAlloc)))
metadata_list = []
for name, buffer in self.nng.metadata: