diff options
Diffstat (limited to 'ethosu/vela/npu_performance.py')
-rw-r--r-- | ethosu/vela/npu_performance.py | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py index 0c8a9073..81d0be7e 100644 --- a/ethosu/vela/npu_performance.py +++ b/ethosu/vela/npu_performance.py @@ -620,8 +620,8 @@ def estimate_full_op_performance( prev_cost = schedule.cost_map[prev_op] if prev_op else None if op.parent_op.bias: query.const_shape = Shape4D(1, 1, 1, op.ofm.shape.depth) - if cost.buffered_weight_tensors: - query.const_memory_area = cost.buffered_weight_tensors[0].mem_area + if cost.buffered_weight_tensor: + query.const_memory_area = cost.buffered_weight_tensor.mem_area else: query.const_memory_area = cost.npu_weights_tensor.mem_area @@ -649,7 +649,7 @@ def estimate_full_op_performance( # LUT read from SHRAM TODO remove? scaled_bws[lut_tensor.mem_area][lut_tensor.purpose][BandwidthDirection.Read] += bw - if cost.npu_weights_tensor and cost.buffered_weight_tensors: + if cost.npu_weights_tensor and cost.buffered_weight_tensor: # DMA Weight Transfer sz = 0 # Get the size of the first DMA @@ -661,10 +661,10 @@ def estimate_full_op_performance( total_sz = len(cost.npu_weights_tensor.buffer) bws[cost.npu_weights_tensor.mem_area][TensorPurpose.Weights][BandwidthDirection.Read] += total_sz - bws[cost.buffered_weight_tensors[0].mem_area][TensorPurpose.Weights][BandwidthDirection.Write] += total_sz + bws[cost.buffered_weight_tensor.mem_area][TensorPurpose.Weights][BandwidthDirection.Write] += total_sz ws_first_transfer_cycles = measure_mem2mem_cycles( - arch, cost.npu_weights_tensor.mem_area, cost.buffered_weight_tensors[0].mem_area, sz + arch, cost.npu_weights_tensor.mem_area, cost.buffered_weight_tensor.mem_area, sz ) # Add cycles for Weight + Scale Transfer @@ -720,7 +720,7 @@ def estimate_full_op_performance( bw = access.const_read[0] * bandwidth_compression_scale_approx bws[query.const_memory_area][TensorPurpose.Weights][BandwidthDirection.Read] += bw - if not cost.buffered_weight_tensors: + if not cost.buffered_weight_tensor: scaled_bws[query.const_memory_area][TensorPurpose.Weights][BandwidthDirection.Read] += bw if access.const_read[1] > 0: @@ -728,7 +728,7 @@ def estimate_full_op_performance( bw = access.const_read[1] * op.parent_op.bias.element_size() bws[query.const_memory_area][TensorPurpose.FSBias][BandwidthDirection.Read] += bw - if not cost.buffered_weight_tensors: + if not cost.buffered_weight_tensor: scaled_bws[query.const_memory_area][TensorPurpose.FSBias][BandwidthDirection.Read] += bw update_summary_cycles(arch, scaled_bws, cycles_a) |