MLBEDSW-6263: Use separate tensors for double buffering

Uses separate tensors for the individual weight buffers in case of weight double buffering. Each weight buffer tensor gets its own individual live range. Change-Id: I724a8c61a7045615fbd2ed9535663076ac8edd13 Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
author: Louis Verhaard <louis.verhaard@arm.com> 2022-03-01 11:26:58 +0100
committer: Fredrik Svedberg <fredrik.svedberg@arm.com> 2022-03-30 13:00:15 +0000
commit: cc5f4de1c35ba44fca7ff6295c6ae846f8242344 (patch)
tree: 68c4f8124a3ee6ec6f7fceb32a1d8aec11ac9a86 /ethosu/vela/npu_performance.py
parent: a19b4671dd0594181a2789930cc98bf5dc41ded4 (diff)
download: ethos-u-vela-cc5f4de1c35ba44fca7ff6295c6ae846f8242344.tar.gz
1 files changed, 7 insertions, 7 deletions
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 8c4aee63..4ffca496 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -608,8 +608,8 @@ def estimate_full_op_performance(
     prev_cost = schedule.cost_map[prev_op] if prev_op else None
     if op.parent_op.bias:
         query.const_shape = Shape4D(1, 1, 1, op.ofm.shape.depth)
-        if cost.buffered_weight_tensor:
-            query.const_memory_area = cost.buffered_weight_tensor.mem_area
+        if cost.buffered_weight_tensors:
+            query.const_memory_area = cost.buffered_weight_tensors[0].mem_area
         else:
             query.const_memory_area = cost.npu_weights_tensor.mem_area
 
@@ -637,7 +637,7 @@ def estimate_full_op_performance(
             # LUT read from SHRAM TODO remove?
             scaled_bws[lut_tensor.mem_area][lut_tensor.purpose][BandwidthDirection.Read] += bw
 
-    if cost.npu_weights_tensor and cost.buffered_weight_tensor:
+    if cost.npu_weights_tensor and cost.buffered_weight_tensors:
         # DMA Weight Transfer
         sz = 0
         # Get the size of the first DMA
@@ -649,10 +649,10 @@ def estimate_full_op_performance(
 
         total_sz = len(cost.npu_weights_tensor.buffer)
         bws[cost.npu_weights_tensor.mem_area][TensorPurpose.Weights][BandwidthDirection.Read] += total_sz
-        bws[cost.buffered_weight_tensor.mem_area][TensorPurpose.Weights][BandwidthDirection.Write] += total_sz
+        bws[cost.buffered_weight_tensors[0].mem_area][TensorPurpose.Weights][BandwidthDirection.Write] += total_sz
 
         ws_first_transfer_cycles = measure_mem2mem_cycles(
-            arch, cost.npu_weights_tensor.mem_area, cost.buffered_weight_tensor.mem_area, sz
+            arch, cost.npu_weights_tensor.mem_area, cost.buffered_weight_tensors[0].mem_area, sz
         )
 
         # Add cycles for Weight + Scale Transfer
@@ -708,7 +708,7 @@ def estimate_full_op_performance(
         bw = access.const_read[0] * bandwidth_compression_scale_approx
         bws[query.const_memory_area][TensorPurpose.Weights][BandwidthDirection.Read] += bw
 
-        if not cost.buffered_weight_tensor:
+        if not cost.buffered_weight_tensors:
             scaled_bws[query.const_memory_area][TensorPurpose.Weights][BandwidthDirection.Read] += bw
 
     if access.const_read[1] > 0:
@@ -716,7 +716,7 @@ def estimate_full_op_performance(
         bw = access.const_read[1] * op.parent_op.bias.element_size()
         bws[query.const_memory_area][TensorPurpose.FSBias][BandwidthDirection.Read] += bw
 
-        if not cost.buffered_weight_tensor:
+        if not cost.buffered_weight_tensors:
             scaled_bws[query.const_memory_area][TensorPurpose.FSBias][BandwidthDirection.Read] += bw
 
     update_summary_cycles(arch, scaled_bws, cycles_a)
author	Louis Verhaard <louis.verhaard@arm.com>	2022-03-01 11:26:58 +0100
committer	Fredrik Svedberg <fredrik.svedberg@arm.com>	2022-03-30 13:00:15 +0000
commit	cc5f4de1c35ba44fca7ff6295c6ae846f8242344 (patch)
tree	68c4f8124a3ee6ec6f7fceb32a1d8aec11ac9a86 /ethosu/vela/npu_performance.py
parent	a19b4671dd0594181a2789930cc98bf5dc41ded4 (diff)
download	ethos-u-vela-cc5f4de1c35ba44fca7ff6295c6ae846f8242344.tar.gz