From fd8b500085d1ac1cca54a71631d21713a3c21f09 Mon Sep 17 00:00:00 2001 From: Rickard Bolin Date: Mon, 16 May 2022 09:11:06 +0000 Subject: MLBEDSW-6263: Use separate tensors for double buffering Uses separate tensors for the individual weight buffers in case of weight double buffering. Each weight buffer tensor gets its own individual live range. This patch is a clone of a previously reverted patch, but with some additional bug fixes applied. Signed-off-by: Rickard Bolin Change-Id: I868c70d15821eb9f1399186f2da6e7345f6ee343 --- ethosu/vela/weight_compressor.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'ethosu/vela/weight_compressor.py') diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py index 86b424a4..78c43511 100644 --- a/ethosu/vela/weight_compressor.py +++ b/ethosu/vela/weight_compressor.py @@ -68,12 +68,19 @@ class NpuWeightTensor(Tensor): def __init__(self, name): Tensor.__init__(self, None, None, name + "_npu_encoded_weights") self.buffer = [] - self.max_range_bytes = 0 + self.double_buffer_sizes = [0, 0] # Required sizes if double buffering is used self.encoded_ranges = OrderedDict() self.hw_traversal = NpuBlockTraversal.DEPTH_FIRST self.dtype = DataType.uint8 self.scale_compression_config = None + def max_range_bytes(self): + return max(self.double_buffer_sizes) + + def double_buffer_size(self): + """Return total required size for double buffering""" + return sum(self.double_buffer_sizes) + class CompressedWeightCache: """Global tensor weight compression cache""" @@ -357,7 +364,7 @@ def encode_weight_and_scale_tensor( weights = np.flip(weights, axis=(0, 1)) encoded_stream = bytearray() - max_single_buffer_len = 0 + double_buffer_sizes = [0, 0] is_depthwise = npu_block_type == NpuBlockType.ConvolutionDepthWise # Bias & scale @@ -435,11 +442,11 @@ def encode_weight_and_scale_tensor( npu_tensor.encoded_ranges[key] = weight_range # Remember maximum encoded length for DoubleBuffering - max_single_buffer_len = max(max_single_buffer_len, len(encoded_stream) - buffer_start_offset) + double_buffer_sizes[idx % 2] = max(double_buffer_sizes[idx % 2], len(encoded_stream) - buffer_start_offset) # Attach buffer to tensor npu_tensor.buffer = encoded_stream - npu_tensor.max_range_bytes = max_single_buffer_len + npu_tensor.double_buffer_sizes = double_buffer_sizes npu_tensor.set_all_shapes([1, 1, 1, len(encoded_stream)]) npu_tensor.format = TensorFormat.WeightsCompressed -- cgit v1.2.1