From 268394d797db60d07eeace05a2c57e927da0ea15 Mon Sep 17 00:00:00 2001 From: Jacob Bohlin Date: Thu, 13 Aug 2020 13:24:59 +0200 Subject: MLBEDSW-1974: Set Scratch buffers size Set the actual size of the Scratch and Fast Scratch buffer and remove both Scratch buffers from the subgraph inputs. Signed-off-by: Jacob Bohlin Change-Id: I9e4213f48289d9136cdd4cd43c668d37c6af8530 --- ethosu/vela/compiler_driver.py | 6 ++++++ ethosu/vela/npu_serialisation.py | 5 +---- ethosu/vela/tflite_writer.py | 23 ++--------------------- 3 files changed, 9 insertions(+), 25 deletions(-) diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py index 05bf65a4..1d7521b1 100644 --- a/ethosu/vela/compiler_driver.py +++ b/ethosu/vela/compiler_driver.py @@ -291,6 +291,12 @@ def compiler_driver(nng, arch, options, scheduler_options): npu_serialisation.rewrite_npu_call_ops(nng, root_sg, arch) + # Set Scratch and Fast_scratch Tensor size + if scratch_tens is not None: + scratch_tens.set_all_shapes([root_sg.memory_used_per_type.get(MemType.Scratch, 0)]) + if scratch_fast_tens is not None: + scratch_fast_tens.set_all_shapes([root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0)]) + # Allocate all Cpu constant tensors, this is done last because the Npu-ops # have to be serialized into flash and scratch tensors first tensor_allocation.allocate_tensors( diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py index 7989fa90..0bd03001 100644 --- a/ethosu/vela/npu_serialisation.py +++ b/ethosu/vela/npu_serialisation.py @@ -103,11 +103,8 @@ def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, scratch_fas sg.flash_tensor = make_memory_tensor( sg.name + "_flash", flash_area, MemType.Permanent_CPU, flash_size, True, arch ) - # Scratch fast tensor size set to 0. This forces a minimal allocation in the tensor arena - # which causes a slot in the basep registers to be reserved, so that the scratch fast tensor - # address can be overridden. sg.scratch_fast_tensor = make_memory_tensor( - sg.name + "_scratch_fast", scratch_fast_area, MemType.Scratch, 0, False, arch + sg.name + "_scratch_fast", scratch_fast_area, MemType.Scratch_fast, 0, False, arch ) sg.scratch_fast_tensor.purpose = TensorPurpose.Scratch else: diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py index e24aa282..0f20878d 100644 --- a/ethosu/vela/tflite_writer.py +++ b/ethosu/vela/tflite_writer.py @@ -24,7 +24,6 @@ from flatbuffers.builder import UOffsetTFlags from .nn_graph import PassPlacement from .operation import Op from .tensor import MemType -from .tensor import TensorPurpose from .tflite import Buffer from .tflite import Metadata from .tflite import Model @@ -234,9 +233,6 @@ class TFLiteSerialiser: tens_shape = [tens_shape[idx] for idx in reorder] values = values.transpose(reorder) - if tens.purpose == TensorPurpose.Scratch: - tens_shape = [0] - buf_id = self.buffer_map[tens] self.buffers_to_write[buf_id] = values.flatten().view(np.uint8) @@ -327,11 +323,6 @@ class TFLiteSerialiser: scratch_tensors = [tens for tens in all_tensors if tens.name.endswith("scratch")] - scratch_fast_tensor = None - for tens in all_tensors: - if tens.name.endswith("scratch_fast"): - scratch_fast_tensor = tens - if len(scratch_tensors) == 0: scratch_tensor = None else: @@ -347,16 +338,6 @@ class TFLiteSerialiser: assert all(inp in sg.original_inputs for inp in sg.input_tensors) inputs = [self.tensor_map[tens] for tens in sg.original_inputs if tens in self.tensor_map] - # Add the Scratch Tensors as input to the NPU subgraph to get them allocated by TensorFlow Lite Micro - scratch_tensor_idx = self.tensor_map.get(scratch_tensor, None) - scratch_fast_tensor_idx = self.tensor_map.get(scratch_fast_tensor, None) - - if scratch_tensor_idx is not None and scratch_tensor_idx not in inputs: - inputs.append(scratch_tensor_idx) - - if scratch_fast_tensor_idx is not None and scratch_fast_tensor_idx not in inputs: - inputs.append(scratch_fast_tensor_idx) - inputs_offset = self.write_int_vector(inputs) outputs_offset = self.write_int_vector( [self.tensor_map[tens] for tens in sg.output_tensors if tens in self.tensor_map] @@ -424,8 +405,8 @@ class TFLiteSerialiser: # Ensure that the order of the offsets match the order of the tensors for tens, idx in self.tensor_map.items(): # Set offsets for tensor allocated in Tensor Arena or in the scratch_fast area - if tens.mem_type in set((MemType.Scratch, MemType.Scratch_fast)) and tens.address is not None: - offsets[idx] = np.int32(tens.address) + if tens.mem_type in set((MemType.Scratch, MemType.Scratch_fast)): + offsets[idx] = np.int32(tens.address) if tens.address is not None else np.int32(0) self.nng.metadata.append(("OfflineMemoryAllocation", np.array([version, subgraph_idx, nbr_tensors] + offsets))) -- cgit v1.2.1