aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/std_transforms_fixed.hpp
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2024-02-12 21:32:51 +0000
committerGunes Bayir <gunes.bayir@arm.com>2024-02-21 10:36:22 +0000
commitef637398a8c2060e15de438020c53331da8bd6dd (patch)
treeb1a1738736c9b6b49e76767e44bf4b77bf732876 /src/core/NEON/kernels/arm_gemm/std_transforms_fixed.hpp
parent0a48c4c83b598991b4d4235f870c24d9e6634b20 (diff)
downloadComputeLibrary-ef637398a8c2060e15de438020c53331da8bd6dd.tar.gz
Integrate new pretranspose_b_array with extra fused transpose of B
This patch fuses the transposition taking place in Acl with the transformations done in arm_gemm (called pretranspose_b_array) if the underlying kernel and transform supports it. This should improve start-up time (as it's for constant Rhs matrices) and memory footprint. The transformations in arm_gemm are kernel specific. The Rhs matrix is transformed into certain layouts to improve the performance. Resolves: COMPMID-6595 Change-Id: Id2932dd966e59f903c279417bebcea83d9a42464 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11144 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/std_transforms_fixed.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/std_transforms_fixed.hpp9
1 files changed, 7 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/std_transforms_fixed.hpp b/src/core/NEON/kernels/arm_gemm/std_transforms_fixed.hpp
index 4669be9993..a9cbf4ec8d 100644
--- a/src/core/NEON/kernels/arm_gemm/std_transforms_fixed.hpp
+++ b/src/core/NEON/kernels/arm_gemm/std_transforms_fixed.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2018-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -63,9 +63,14 @@ public:
ConvolutionInterleave<height, block, VLType::None>(out, ptr, stride, conv, rounded_stringlen, y0, ymax, k0, kmax, integrate_sums, row_sum_multiplier);
}
+ bool PrepareB_supports_transpose() const {
+ return false;
+ }
+
template<typename TIn>
void PrepareB(TOperand *out, const TIn *in, const int stride, const int x0,
- const int xmax, const int k0, const int kmax) const {
+ const int xmax, const int k0, const int kmax, bool transposed) const {
+ assert(!transposed);
Transform<width, block, true>(out, in, stride, x0, xmax, k0, kmax);
}