aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Bohlin <jacob.bohlin@arm.com>2020-08-13 13:24:59 +0200
committerJacob Bohlin <jacob.bohlin@arm.com>2020-11-04 08:57:06 +0100
commit268394d797db60d07eeace05a2c57e927da0ea15 (patch)
tree73e23e7b86b442367fcb4b529704e7889779ab2f
parent34359589655e62d0f2cadde53320de71add04e94 (diff)
downloadethos-u-vela-268394d797db60d07eeace05a2c57e927da0ea15.tar.gz
MLBEDSW-1974: Set Scratch buffers size
Set the actual size of the Scratch and Fast Scratch buffer and remove both Scratch buffers from the subgraph inputs. Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com> Change-Id: I9e4213f48289d9136cdd4cd43c668d37c6af8530
-rw-r--r--ethosu/vela/compiler_driver.py6
-rw-r--r--ethosu/vela/npu_serialisation.py5
-rw-r--r--ethosu/vela/tflite_writer.py23
3 files changed, 9 insertions, 25 deletions
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index 05bf65a..1d7521b 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -291,6 +291,12 @@ def compiler_driver(nng, arch, options, scheduler_options):
npu_serialisation.rewrite_npu_call_ops(nng, root_sg, arch)
+ # Set Scratch and Fast_scratch Tensor size
+ if scratch_tens is not None:
+ scratch_tens.set_all_shapes([root_sg.memory_used_per_type.get(MemType.Scratch, 0)])
+ if scratch_fast_tens is not None:
+ scratch_fast_tens.set_all_shapes([root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0)])
+
# Allocate all Cpu constant tensors, this is done last because the Npu-ops
# have to be serialized into flash and scratch tensors first
tensor_allocation.allocate_tensors(
diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py
index 7989fa9..0bd0300 100644
--- a/ethosu/vela/npu_serialisation.py
+++ b/ethosu/vela/npu_serialisation.py
@@ -103,11 +103,8 @@ def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, scratch_fas
sg.flash_tensor = make_memory_tensor(
sg.name + "_flash", flash_area, MemType.Permanent_CPU, flash_size, True, arch
)
- # Scratch fast tensor size set to 0. This forces a minimal allocation in the tensor arena
- # which causes a slot in the basep registers to be reserved, so that the scratch fast tensor
- # address can be overridden.
sg.scratch_fast_tensor = make_memory_tensor(
- sg.name + "_scratch_fast", scratch_fast_area, MemType.Scratch, 0, False, arch
+ sg.name + "_scratch_fast", scratch_fast_area, MemType.Scratch_fast, 0, False, arch
)
sg.scratch_fast_tensor.purpose = TensorPurpose.Scratch
else:
diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py
index e24aa28..0f20878 100644
--- a/ethosu/vela/tflite_writer.py
+++ b/ethosu/vela/tflite_writer.py
@@ -24,7 +24,6 @@ from flatbuffers.builder import UOffsetTFlags
from .nn_graph import PassPlacement
from .operation import Op
from .tensor import MemType
-from .tensor import TensorPurpose
from .tflite import Buffer
from .tflite import Metadata
from .tflite import Model
@@ -234,9 +233,6 @@ class TFLiteSerialiser:
tens_shape = [tens_shape[idx] for idx in reorder]
values = values.transpose(reorder)
- if tens.purpose == TensorPurpose.Scratch:
- tens_shape = [0]
-
buf_id = self.buffer_map[tens]
self.buffers_to_write[buf_id] = values.flatten().view(np.uint8)
@@ -327,11 +323,6 @@ class TFLiteSerialiser:
scratch_tensors = [tens for tens in all_tensors if tens.name.endswith("scratch")]
- scratch_fast_tensor = None
- for tens in all_tensors:
- if tens.name.endswith("scratch_fast"):
- scratch_fast_tensor = tens
-
if len(scratch_tensors) == 0:
scratch_tensor = None
else:
@@ -347,16 +338,6 @@ class TFLiteSerialiser:
assert all(inp in sg.original_inputs for inp in sg.input_tensors)
inputs = [self.tensor_map[tens] for tens in sg.original_inputs if tens in self.tensor_map]
- # Add the Scratch Tensors as input to the NPU subgraph to get them allocated by TensorFlow Lite Micro
- scratch_tensor_idx = self.tensor_map.get(scratch_tensor, None)
- scratch_fast_tensor_idx = self.tensor_map.get(scratch_fast_tensor, None)
-
- if scratch_tensor_idx is not None and scratch_tensor_idx not in inputs:
- inputs.append(scratch_tensor_idx)
-
- if scratch_fast_tensor_idx is not None and scratch_fast_tensor_idx not in inputs:
- inputs.append(scratch_fast_tensor_idx)
-
inputs_offset = self.write_int_vector(inputs)
outputs_offset = self.write_int_vector(
[self.tensor_map[tens] for tens in sg.output_tensors if tens in self.tensor_map]
@@ -424,8 +405,8 @@ class TFLiteSerialiser:
# Ensure that the order of the offsets match the order of the tensors
for tens, idx in self.tensor_map.items():
# Set offsets for tensor allocated in Tensor Arena or in the scratch_fast area
- if tens.mem_type in set((MemType.Scratch, MemType.Scratch_fast)) and tens.address is not None:
- offsets[idx] = np.int32(tens.address)
+ if tens.mem_type in set((MemType.Scratch, MemType.Scratch_fast)):
+ offsets[idx] = np.int32(tens.address) if tens.address is not None else np.int32(0)
self.nng.metadata.append(("OfflineMemoryAllocation", np.array([version, subgraph_idx, nbr_tensors] + offsets)))