Fix performance regression in fixed-format kernels

Fix the performance regression in CpuGemmConv2d caused by importing memory at every run for fixed-format kernels. This has been done by adding an bypass_import parameter to the aux. tensor handler class (CpuAuxTensorHandler) and using it in CpuGemmConv2d so that the memory import happens if and only when the associated tensor is used in the gemm pack. Also, improve the documentation of CpuAuxTensorHandler. Resolves: ARMCL-1126 Co-authored by: SiCong Li <sicong.li@arm.com> Change-Id: Idb26bdb2d19419074a6e7f2497a1741ae200603f Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11240 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Gunes Bayir <gunes.bayir@arm.com> 2024-03-04 14:55:24 +0000
committer: Gunes Bayir <gunes.bayir@arm.com> 2024-03-04 16:37:14 +0000
commit: bf053734c468ebc9fd2e535ab8c357b55fdaad43 (patch)
tree: 85a292b8a31c3bec2c3f4d32a5cec6ac48741a36 /src/cpu/operators/CpuGemmConv2d.cpp
parent: 6fe9eafe0707387e65f9b3c188f4145f64415ce3 (diff)
download: ComputeLibrary-bf053734c468ebc9fd2e535ab8c357b55fdaad43.tar.gz
1 files changed, 16 insertions, 13 deletions
diff --git a/src/cpu/operators/CpuGemmConv2d.cpp b/src/cpu/operators/CpuGemmConv2d.cpp
index 31c873c2ba..7460f2020c 100644
--- a/src/cpu/operators/CpuGemmConv2d.cpp
+++ b/src/cpu/operators/CpuGemmConv2d.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023 Arm Limited.
+ * Copyright (c) 2021-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -839,23 +839,26 @@ void CpuGemmConv2d::run(ITensorPack &tensors)
     auto weights = gemm_pack.get_const_tensor(TensorType::ACL_SRC_1);
     ARM_COMPUTE_ERROR_ON_NULLPTR(weights);
     // Re-interpreted weights. Only tensor shape is changed. Only memory import, no allocation
+    const bool use_reinterpreted_wei = (_run_wt && _wt_method == WeightTransformMethod::ReinterpretThenTranspose);
     CpuAuxTensorHandler reinterpreted_wei(
         _weights_reshaped, *weights,
         /* import only if we chose the ReinterpretThenTranspose path, because otherwise the weight may have been freed */
-        !(_run_wt && _wt_method == WeightTransformMethod::ReinterpretThenTranspose));
-    CpuAuxTensorHandler reshaped_wei(offset_int_vec(WeightsReshaped), _weights_reshaped, tensors);
+        !use_reinterpreted_wei);
+
+    const bool          use_reshaped_wei = (_run_wt && (_wt_method == WeightTransformMethod::ReshapeThenTranspose ||
+                                               _wt_method == WeightTransformMethod::FusedReshapeAndTranspose));
+    CpuAuxTensorHandler reshaped_wei(offset_int_vec(WeightsReshaped), _weights_reshaped, tensors,
+                                     false /* pack_inject */, !use_reshaped_wei /* bypass_alloc */,
+                                     !use_reshaped_wei /* bypass_import */
+    );
     // Update the weights to use if it has been reshaped
-    if (_run_wt)
+    if (use_reinterpreted_wei)
     {
-        if (_wt_method == WeightTransformMethod::ReinterpretThenTranspose)
-        {
-            gemm_pack.add_const_tensor(TensorType::ACL_SRC_1, reinterpreted_wei.get());
-        }
-        else if (_wt_method == WeightTransformMethod::ReshapeThenTranspose ||
-                 _wt_method == WeightTransformMethod::FusedReshapeAndTranspose)
-        {
-            gemm_pack.add_const_tensor(TensorType::ACL_SRC_1, reshaped_wei.get());
-        }
+        gemm_pack.add_const_tensor(TensorType::ACL_SRC_1, reinterpreted_wei.get());
+    }
+    else if (use_reshaped_wei)
+    {
+        gemm_pack.add_const_tensor(TensorType::ACL_SRC_1, reshaped_wei.get());
     }
 
     // Runs CpuGemm or CpuGemmLowpMatrixMultiplyCore functions
author	Gunes Bayir <gunes.bayir@arm.com>	2024-03-04 14:55:24 +0000
committer	Gunes Bayir <gunes.bayir@arm.com>	2024-03-04 16:37:14 +0000
commit	bf053734c468ebc9fd2e535ab8c357b55fdaad43 (patch)
tree	85a292b8a31c3bec2c3f4d32a5cec6ac48741a36 /src/cpu/operators/CpuGemmConv2d.cpp
parent	6fe9eafe0707387e65f9b3c188f4145f64415ce3 (diff)
download	ComputeLibrary-bf053734c468ebc9fd2e535ab8c357b55fdaad43.tar.gz