diff options
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp | 20 |
1 files changed, 14 insertions, 6 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp index 362a3e30ea..4f732f7d94 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -1067,11 +1067,18 @@ public: } } - void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { - pretranspose_B_array_part(in_buffer, B, ldb, B_multi_stride, 0, get_B_pretranspose_window_size()); + // Support for transposed B is a property of the strategy::transpose type + bool B_pretranspose_supports_transpose() const override { + typename transform_type<strategy, MergeStep && std::is_same<OutputStage, Requantize32>::value>::type transforms; + + return transforms.PrepareB_supports_transpose(); + } + + void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride, const bool transposed) override { + pretranspose_B_array_part(in_buffer, B, ldb, B_multi_stride, transposed, 0, get_B_pretranspose_window_size()); } - void pretranspose_B_array_part(void *in_buffer, const To *B, const int ldb, const int B_multi_stride, size_t start, size_t end) override { + void pretranspose_B_array_part(void *in_buffer, const To *B, const int ldb, const int B_multi_stride, const bool transposed, size_t start, size_t end) override { // Perform column sums etc as part of the last block. if (end >= get_B_pretranspose_window_size()) { requantize_bias(in_buffer, B, ldb, B_multi_stride); @@ -1134,7 +1141,8 @@ public: strat.transforms.PrepareB(buffer, B + (current.multi() * B_multi_stride), ldb, x0, xmax, (k_section_base * _Ksize) + k_offset, // K starting point - compute row to read based on our section and the true section length. - (k_section_base * _Ksize) + k_offset + k_length); // K end point - starting point plus length computed above. + (k_section_base * _Ksize) + k_offset + k_length, // K end point - starting point plus length computed above. + transposed); // We need to modify our position based on the ROUNDED version of what we just did. unsigned int padded_length = roundup(k_length, strategy::k_unroll()); @@ -1149,7 +1157,7 @@ public: // In the single K section case, can process the whole lot in one go. // Caution: 'blockwalker::kmax()' rounds up, so clamp to valid _Ksize. strat.transforms.PrepareB(buffer, B + (current.multi() * B_multi_stride), ldb, - current.x0(), current.xmax(), current.k0(), std::min(current.kmax(), _Ksize)); + current.x0(), current.xmax(), current.k0(), std::min(current.kmax(), _Ksize), transposed); buffer += roundup(current.xmax() - current.x0(), strategy::out_width()) * roundup(current.kmax() - current.k0(), strategy::k_unroll()); } |