aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/utils/CpuAuxTensorHandler.h
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2023-10-17 17:38:57 +0100
committerSiCong Li <sicong.li@arm.com>2023-11-08 09:49:56 +0000
commitc5ab4df0c11dc66db47f2070edc719923af3367e (patch)
treec04bdac32528e628b2a9b9a1c1653e300328fc1b /src/cpu/utils/CpuAuxTensorHandler.h
parent4a9dbedfbfa66c2612c7461e60cd867b8aea825b (diff)
downloadComputeLibrary-c5ab4df0c11dc66db47f2070edc719923af3367e.tar.gz
Optimize CpuGemmConv2d start-up time
When weight has no holes, we can replace CpuWeightsReshapeKernel with: - Collapse by reinterpreting weight's 3 spatial dimensions - Perform CpuTranspose For more details see the documentation in src/cpu/operators/CpuGemmConv2d.cpp This is one optimization since the CpuTranspose is better performing than CpuWeightsReshapeKernel A second optimization is to fuse this transpose with other weight transformations (e.g. pretranspose_B_array in CpuGemmAssemblyDispatch) However this second optimization depends on how the underlying gemm methods (the fall back path: CpuGemmMatrixMultiplyKernel or the assembly path: CpuGemmAssemblyDispatch) chooses to fuse the transpose. Therefore, this patch moves the transpose down from CpuGemmConv2d, to the individual gemm operators where the fusion decision needs to be made, by passing an extra "transpose_b" flag to CpuGemm New transpose_b flag in different scopes (they are all the same, but with different names because pretranspose_b has a different meaning in GemmAssemblyDispatch): GEMMInfo::pretranspose_B -> AsmGemmInfo::transpose_b New auxilliary tensors holding the transposed b result: - CpuGemm optimized path: CpuGemmAssemblyDispatch::PrePretransposedB - CpuGemm fallback path: CpuGemm::PreTransposedRHS Note that this patch does not yet have the second optimization (COMPMID-6595), but it prepares for it. Relates to COMPMID-6595 Resolves COMPMID-6499 Change-Id: I999a2da9da4b2b15369a3cc06d7872c86e0190ea Signed-off-by: SiCong Li <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10526 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Anitha Raj <Anitha.Raj@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/utils/CpuAuxTensorHandler.h')
-rw-r--r--src/cpu/utils/CpuAuxTensorHandler.h16
1 files changed, 11 insertions, 5 deletions
diff --git a/src/cpu/utils/CpuAuxTensorHandler.h b/src/cpu/utils/CpuAuxTensorHandler.h
index e23b88a777..627216837b 100644
--- a/src/cpu/utils/CpuAuxTensorHandler.h
+++ b/src/cpu/utils/CpuAuxTensorHandler.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CPU_UTILS_CPU_AUX_TENSOR_HANDLER_H
-#define ARM_COMPUTE_CPU_UTILS_CPU_AUX_TENSOR_HANDLER_H
+#ifndef ACL_SRC_CPU_UTILS_CPUAUXTENSORHANDLER_H
+#define ACL_SRC_CPU_UTILS_CPUAUXTENSORHANDLER_H
#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/TensorInfo.h"
@@ -71,7 +71,13 @@ public:
}
}
- CpuAuxTensorHandler(TensorInfo &info, ITensor &tensor) : _tensor()
+ /** Create a temporary handle to the original tensor with a new @ref TensorInfo
+ * This is useful if we want to change a tensor's tensor info at run time without modifying the original tensor
+ *
+ * @param[in] info New tensor info to "assign" to @p tensor
+ * @param[in] tensor Tensor to be assigned a new @ref TensorInfo
+ */
+ CpuAuxTensorHandler(TensorInfo &info, const ITensor &tensor) : _tensor()
{
_tensor.allocator()->soft_init(info);
if (info.total_size() <= tensor.info()->total_size())
@@ -108,4 +114,4 @@ private:
};
} // namespace cpu
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_UTILS_CPU_AUX_TENSOR_HANDLER_H */
+#endif // ACL_SRC_CPU_UTILS_CPUAUXTENSORHANDLER_H