From 3c07c97e0202c1cf01eba06c24b37a8f15ff7a7c Mon Sep 17 00:00:00 2001
From: Louis Verhaard <louis.verhaard@arm.com>
Date: Thu, 7 May 2020 08:12:58 +0200
Subject: MLBEDSW-1941: Bug fix shared weights

If same weight tensor was used with different block configs,
errors would occur.

Fixed by always cloning weight tensors, using a global weight
compression cache and modifying the linear allocator to
detect multiple usage of same weight compression.

Change-Id: I91ca59176e1c59c66e0ac7a4227f2b5f0b47053f
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
---
 ethosu/vela/tensor.py | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

(limited to 'ethosu/vela/tensor.py')

diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index 160cf630..2f91f61c 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -225,7 +225,6 @@ class Tensor:
         "quantization",
         "weight_compressed_offsets",
         "element_size_bytes",
-        "reshaped",
         "block_traversal",
         "offset",
         "cpu_tensor",
@@ -273,8 +272,6 @@ class Tensor:
 
         # quantization parameters
         self.quantization = None
-
-        self.reshaped = False
         self.block_traversal = TensorBlockTraversal.Default
         self.resampling_mode = resampling_mode.NONE
 
@@ -294,20 +291,13 @@ class Tensor:
 
         res.values = self.values
         res.quant_values = self.quant_values
-        res.compressed_values = self.compressed_values
         res.mem_area = self.mem_area
         res.format = self.format
         res.purpose = self.purpose
         res.sub_purpose = self.sub_purpose
         res.alignment = self.alignment
-        res.weight_transpose_depthwise = self.weight_transpose_depthwise
-
-        res.storage_compression_scale = self.storage_compression_scale
         res.bandwidth_compression_scale = self.bandwidth_compression_scale
-        res.compression_scale_for_worst_weight_stream = self.compression_scale_for_worst_weight_stream
-        res.weight_compression_scales = self.weight_compression_scales
         res.storage_rounding_quantum = self.storage_rounding_quantum
-        res.brick_size = self.brick_size
         res.address = 0
 
         if self.quantization is not None:
@@ -317,6 +307,7 @@ class Tensor:
 
         res.resampling_mode = self.resampling_mode
 
+        res.copy_compressed_weight_info(self)
         return res
 
     def clone_into_fast_storage(self, arch):
@@ -324,6 +315,19 @@ class Tensor:
         res.mem_area = arch.fast_storage_mem_area
         return res
 
+    def copy_compressed_weight_info(self, src_tens):
+        # Copies compressed values + all related weight compression info from the given tensor
+        self.compressed_values = src_tens.compressed_values
+        self.storage_shape = src_tens.storage_shape
+        self.brick_size = src_tens.brick_size
+        self.weight_compression_scales = src_tens.weight_compression_scales
+        self.weight_compressed_offsets = src_tens.weight_compressed_offsets
+        self.weight_transpose_depthwise = src_tens.weight_transpose_depthwise
+        self.compression_scale_for_worst_weight_stream = src_tens.compression_scale_for_worst_weight_stream
+        self.storage_compression_scale = src_tens.storage_compression_scale
+        self.block_traversal = src_tens.block_traversal
+        self.weight_compression_config = src_tens.weight_compression_config
+
     def set_format(self, fmt, arch):
         self.format = fmt
         shape_len = 0
@@ -527,6 +531,14 @@ class Tensor:
 
         return strides
 
+    def needs_dma(self):
+        return len(self.ops) == 1 and self.ops[0].type == "DMA"
+
+    def get_dma_src_tensor(self):
+        # For weight tensors that need DMA: returns the source tensor in Flash, else None
+        # Note: for DMA ops, Pass.weight_tensor is referring to the SRAM weight tensor
+        return self.ops[0].inputs[0] if self.needs_dma() else None
+
     def compressed_stream_index_from_coord(self, coord):
         assert self.format == TensorFormat.WeightsCompressed
         assert len(self.compressed_values) > 0
@@ -575,7 +587,7 @@ class Tensor:
             if len(self.weight_compressed_offsets) == 0:
                 return 0
 
-            if len(self.ops) == 1 and self.ops[0].type == "DMA" and self.sub_purpose == TensorSubPurpose.DoubleBuffer:
+            if self.needs_dma() and self.sub_purpose == TensorSubPurpose.DoubleBuffer:
                 depth = orig_coord[-1]
                 brick_depth = self.brick_size[-1]
                 # Clamp position at final element index
-- 
cgit v1.2.1