aboutsummaryrefslogtreecommitdiff
path: root/ethosu/vela/npu_performance.py
diff options
context:
space:
mode:
authorLouis Verhaard <louis.verhaard@arm.com>2022-03-01 11:26:58 +0100
committerFredrik Svedberg <fredrik.svedberg@arm.com>2022-03-30 13:00:15 +0000
commitcc5f4de1c35ba44fca7ff6295c6ae846f8242344 (patch)
tree68c4f8124a3ee6ec6f7fceb32a1d8aec11ac9a86 /ethosu/vela/npu_performance.py
parenta19b4671dd0594181a2789930cc98bf5dc41ded4 (diff)
downloadethos-u-vela-cc5f4de1c35ba44fca7ff6295c6ae846f8242344.tar.gz
MLBEDSW-6263: Use separate tensors for double buffering
Uses separate tensors for the individual weight buffers in case of weight double buffering. Each weight buffer tensor gets its own individual live range. Change-Id: I724a8c61a7045615fbd2ed9535663076ac8edd13 Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
Diffstat (limited to 'ethosu/vela/npu_performance.py')
-rw-r--r--ethosu/vela/npu_performance.py14
1 files changed, 7 insertions, 7 deletions
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 8c4aee63..4ffca496 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -608,8 +608,8 @@ def estimate_full_op_performance(
prev_cost = schedule.cost_map[prev_op] if prev_op else None
if op.parent_op.bias:
query.const_shape = Shape4D(1, 1, 1, op.ofm.shape.depth)
- if cost.buffered_weight_tensor:
- query.const_memory_area = cost.buffered_weight_tensor.mem_area
+ if cost.buffered_weight_tensors:
+ query.const_memory_area = cost.buffered_weight_tensors[0].mem_area
else:
query.const_memory_area = cost.npu_weights_tensor.mem_area
@@ -637,7 +637,7 @@ def estimate_full_op_performance(
# LUT read from SHRAM TODO remove?
scaled_bws[lut_tensor.mem_area][lut_tensor.purpose][BandwidthDirection.Read] += bw
- if cost.npu_weights_tensor and cost.buffered_weight_tensor:
+ if cost.npu_weights_tensor and cost.buffered_weight_tensors:
# DMA Weight Transfer
sz = 0
# Get the size of the first DMA
@@ -649,10 +649,10 @@ def estimate_full_op_performance(
total_sz = len(cost.npu_weights_tensor.buffer)
bws[cost.npu_weights_tensor.mem_area][TensorPurpose.Weights][BandwidthDirection.Read] += total_sz
- bws[cost.buffered_weight_tensor.mem_area][TensorPurpose.Weights][BandwidthDirection.Write] += total_sz
+ bws[cost.buffered_weight_tensors[0].mem_area][TensorPurpose.Weights][BandwidthDirection.Write] += total_sz
ws_first_transfer_cycles = measure_mem2mem_cycles(
- arch, cost.npu_weights_tensor.mem_area, cost.buffered_weight_tensor.mem_area, sz
+ arch, cost.npu_weights_tensor.mem_area, cost.buffered_weight_tensors[0].mem_area, sz
)
# Add cycles for Weight + Scale Transfer
@@ -708,7 +708,7 @@ def estimate_full_op_performance(
bw = access.const_read[0] * bandwidth_compression_scale_approx
bws[query.const_memory_area][TensorPurpose.Weights][BandwidthDirection.Read] += bw
- if not cost.buffered_weight_tensor:
+ if not cost.buffered_weight_tensors:
scaled_bws[query.const_memory_area][TensorPurpose.Weights][BandwidthDirection.Read] += bw
if access.const_read[1] > 0:
@@ -716,7 +716,7 @@ def estimate_full_op_performance(
bw = access.const_read[1] * op.parent_op.bias.element_size()
bws[query.const_memory_area][TensorPurpose.FSBias][BandwidthDirection.Read] += bw
- if not cost.buffered_weight_tensor:
+ if not cost.buffered_weight_tensors:
scaled_bws[query.const_memory_area][TensorPurpose.FSBias][BandwidthDirection.Read] += bw
update_summary_cycles(arch, scaled_bws, cycles_a)