MLBEDSW-6384: Updated weight buffering cycle calculation

- The npu cycles are not correct calculated when only one weight buffer is used, since weights can not be fetched in parallel. - Added new calculation in the single buffer case. Signed-off-by: Johan Alfven <johan.alfven@arm.com> Change-Id: I8568912d11d137a298225ab77b8b3272613c76f6
author: Johan Alfvén <johan.alfven@arm.com> 2022-05-15 14:54:51 +0200
committer: tim.hall <tim.hall@arm.com> 2022-05-19 14:44:01 +0000
commit: 0f98de67b2f929a1297326721eb421f0a44ef216 (patch)
tree: 3f76d7776aa391c42ed5900e5aff96c23d519c34
parent: 1e363b10a6d4ce0fc062e34df0182b847b08850d (diff)
download: ethos-u-vela-0f98de67b2f929a1297326721eb421f0a44ef216.tar.gz
1 files changed, 13 insertions, 4 deletions
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index b7607e6..0e2e3ca 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -50,6 +50,7 @@ from .shape4d import Shape4D
 from .tensor import BandwidthDirection
 from .tensor import MemArea
 from .tensor import TensorPurpose
+from .tensor import TensorSubPurpose
 from .tflite_mapping import optype_to_builtintype as tflite_optype_to_builtintype
 from .tosa_mapping import optype_to_tosa_op_type as tosa_optype_to_tosa_op_type
 from .weight_compressor import WeightKey
@@ -674,10 +675,18 @@ def estimate_full_op_performance(
         )
 
         # Add cycles for Weight + Scale Transfer
-        cycles_a[PassCycles.Npu] = max(
-            cost.full_weight_transfer_cycles - slack_cycles + cost.slack_buffering_cycles,
-            cycles.op_cycles + max(ws_first_transfer_cycles - slack_cycles, 0),
-        )
+        if cost.buffered_weight_tensors[0].sub_purpose == TensorSubPurpose.DoubleBuffer:
+            # Double buffer - weights can be fetched in parallel
+            cycles_a[PassCycles.Npu] = max(
+                cost.full_weight_transfer_cycles - slack_cycles + cost.slack_buffering_cycles,
+                cycles.op_cycles + max(ws_first_transfer_cycles - slack_cycles, 0),
+            )
+        else:
+            # Standard buffer - weights can not be fetched in parallel so weight transfer
+            # must be included in the result
+            cycles_a[PassCycles.Npu] = (
+                cycles.op_cycles + cost.full_weight_transfer_cycles - min(ws_first_transfer_cycles, slack_cycles)
+            )
 
         # Add cycles for LUT Transfer
         cycles_a[PassCycles.Npu] += lut_transfer_cycles
author	Johan Alfvén <johan.alfven@arm.com>	2022-05-15 14:54:51 +0200
committer	tim.hall <tim.hall@arm.com>	2022-05-19 14:44:01 +0000
commit	0f98de67b2f929a1297326721eb421f0a44ef216 (patch)
tree	3f76d7776aa391c42ed5900e5aff96c23d519c34
parent	1e363b10a6d4ce0fc062e34df0182b847b08850d (diff)
download	ethos-u-vela-0f98de67b2f929a1297326721eb421f0a44ef216.tar.gz