diff options
author | Jacob Bohlin <jacob.bohlin@arm.com> | 2020-08-13 13:24:59 +0200 |
---|---|---|
committer | Jacob Bohlin <jacob.bohlin@arm.com> | 2020-11-04 08:57:06 +0100 |
commit | 268394d797db60d07eeace05a2c57e927da0ea15 (patch) | |
tree | 73e23e7b86b442367fcb4b529704e7889779ab2f /ethosu | |
parent | 34359589655e62d0f2cadde53320de71add04e94 (diff) | |
download | ethos-u-vela-268394d797db60d07eeace05a2c57e927da0ea15.tar.gz |
MLBEDSW-1974: Set Scratch buffers size
Set the actual size of the Scratch and Fast Scratch buffer and remove both
Scratch buffers from the subgraph inputs.
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: I9e4213f48289d9136cdd4cd43c668d37c6af8530
Diffstat (limited to 'ethosu')
-rw-r--r-- | ethosu/vela/compiler_driver.py | 6 | ||||
-rw-r--r-- | ethosu/vela/npu_serialisation.py | 5 | ||||
-rw-r--r-- | ethosu/vela/tflite_writer.py | 23 |
3 files changed, 9 insertions, 25 deletions
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py index 05bf65a4..1d7521b1 100644 --- a/ethosu/vela/compiler_driver.py +++ b/ethosu/vela/compiler_driver.py @@ -291,6 +291,12 @@ def compiler_driver(nng, arch, options, scheduler_options): npu_serialisation.rewrite_npu_call_ops(nng, root_sg, arch) + # Set Scratch and Fast_scratch Tensor size + if scratch_tens is not None: + scratch_tens.set_all_shapes([root_sg.memory_used_per_type.get(MemType.Scratch, 0)]) + if scratch_fast_tens is not None: + scratch_fast_tens.set_all_shapes([root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0)]) + # Allocate all Cpu constant tensors, this is done last because the Npu-ops # have to be serialized into flash and scratch tensors first tensor_allocation.allocate_tensors( diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py index 7989fa90..0bd03001 100644 --- a/ethosu/vela/npu_serialisation.py +++ b/ethosu/vela/npu_serialisation.py @@ -103,11 +103,8 @@ def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, scratch_fas sg.flash_tensor = make_memory_tensor( sg.name + "_flash", flash_area, MemType.Permanent_CPU, flash_size, True, arch ) - # Scratch fast tensor size set to 0. This forces a minimal allocation in the tensor arena - # which causes a slot in the basep registers to be reserved, so that the scratch fast tensor - # address can be overridden. sg.scratch_fast_tensor = make_memory_tensor( - sg.name + "_scratch_fast", scratch_fast_area, MemType.Scratch, 0, False, arch + sg.name + "_scratch_fast", scratch_fast_area, MemType.Scratch_fast, 0, False, arch ) sg.scratch_fast_tensor.purpose = TensorPurpose.Scratch else: diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py index e24aa282..0f20878d 100644 --- a/ethosu/vela/tflite_writer.py +++ b/ethosu/vela/tflite_writer.py @@ -24,7 +24,6 @@ from flatbuffers.builder import UOffsetTFlags from .nn_graph import PassPlacement from .operation import Op from .tensor import MemType -from .tensor import TensorPurpose from .tflite import Buffer from .tflite import Metadata from .tflite import Model @@ -234,9 +233,6 @@ class TFLiteSerialiser: tens_shape = [tens_shape[idx] for idx in reorder] values = values.transpose(reorder) - if tens.purpose == TensorPurpose.Scratch: - tens_shape = [0] - buf_id = self.buffer_map[tens] self.buffers_to_write[buf_id] = values.flatten().view(np.uint8) @@ -327,11 +323,6 @@ class TFLiteSerialiser: scratch_tensors = [tens for tens in all_tensors if tens.name.endswith("scratch")] - scratch_fast_tensor = None - for tens in all_tensors: - if tens.name.endswith("scratch_fast"): - scratch_fast_tensor = tens - if len(scratch_tensors) == 0: scratch_tensor = None else: @@ -347,16 +338,6 @@ class TFLiteSerialiser: assert all(inp in sg.original_inputs for inp in sg.input_tensors) inputs = [self.tensor_map[tens] for tens in sg.original_inputs if tens in self.tensor_map] - # Add the Scratch Tensors as input to the NPU subgraph to get them allocated by TensorFlow Lite Micro - scratch_tensor_idx = self.tensor_map.get(scratch_tensor, None) - scratch_fast_tensor_idx = self.tensor_map.get(scratch_fast_tensor, None) - - if scratch_tensor_idx is not None and scratch_tensor_idx not in inputs: - inputs.append(scratch_tensor_idx) - - if scratch_fast_tensor_idx is not None and scratch_fast_tensor_idx not in inputs: - inputs.append(scratch_fast_tensor_idx) - inputs_offset = self.write_int_vector(inputs) outputs_offset = self.write_int_vector( [self.tensor_map[tens] for tens in sg.output_tensors if tens in self.tensor_map] @@ -424,8 +405,8 @@ class TFLiteSerialiser: # Ensure that the order of the offsets match the order of the tensors for tens, idx in self.tensor_map.items(): # Set offsets for tensor allocated in Tensor Arena or in the scratch_fast area - if tens.mem_type in set((MemType.Scratch, MemType.Scratch_fast)) and tens.address is not None: - offsets[idx] = np.int32(tens.address) + if tens.mem_type in set((MemType.Scratch, MemType.Scratch_fast)): + offsets[idx] = np.int32(tens.address) if tens.address is not None else np.int32(0) self.nng.metadata.append(("OfflineMemoryAllocation", np.array([version, subgraph_idx, nbr_tensors] + offsets))) |