From 0f98de67b2f929a1297326721eb421f0a44ef216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johan=20Alfv=C3=A9n?= Date: Sun, 15 May 2022 14:54:51 +0200 Subject: MLBEDSW-6384: Updated weight buffering cycle calculation - The npu cycles are not correct calculated when only one weight buffer is used, since weights can not be fetched in parallel. - Added new calculation in the single buffer case. Signed-off-by: Johan Alfven Change-Id: I8568912d11d137a298225ab77b8b3272613c76f6 --- ethosu/vela/npu_performance.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py index b7607e6d..0e2e3ca2 100644 --- a/ethosu/vela/npu_performance.py +++ b/ethosu/vela/npu_performance.py @@ -50,6 +50,7 @@ from .shape4d import Shape4D from .tensor import BandwidthDirection from .tensor import MemArea from .tensor import TensorPurpose +from .tensor import TensorSubPurpose from .tflite_mapping import optype_to_builtintype as tflite_optype_to_builtintype from .tosa_mapping import optype_to_tosa_op_type as tosa_optype_to_tosa_op_type from .weight_compressor import WeightKey @@ -674,10 +675,18 @@ def estimate_full_op_performance( ) # Add cycles for Weight + Scale Transfer - cycles_a[PassCycles.Npu] = max( - cost.full_weight_transfer_cycles - slack_cycles + cost.slack_buffering_cycles, - cycles.op_cycles + max(ws_first_transfer_cycles - slack_cycles, 0), - ) + if cost.buffered_weight_tensors[0].sub_purpose == TensorSubPurpose.DoubleBuffer: + # Double buffer - weights can be fetched in parallel + cycles_a[PassCycles.Npu] = max( + cost.full_weight_transfer_cycles - slack_cycles + cost.slack_buffering_cycles, + cycles.op_cycles + max(ws_first_transfer_cycles - slack_cycles, 0), + ) + else: + # Standard buffer - weights can not be fetched in parallel so weight transfer + # must be included in the result + cycles_a[PassCycles.Npu] = ( + cycles.op_cycles + cost.full_weight_transfer_cycles - min(ws_first_transfer_cycles, slack_cycles) + ) # Add cycles for LUT Transfer cycles_a[PassCycles.Npu] += lut_transfer_cycles -- cgit v1.2.1