From fb9c25d27791d934300581596cce7c5875a79a80 Mon Sep 17 00:00:00 2001 From: David Mansell Date: Tue, 19 Sep 2023 15:49:10 +0100 Subject: arm_gemm: fix 2D threading mode for SME2 "2D" threading mode was not setting the result pointer correctly for SME2 kernels with K blocking - for non-final blocks the result pointer should be NULL so that the intermediate results get written in the accumulator buffer by the kernel. Signed-off-by: David Mansell Change-Id: Idefa538e190a086e1e44a91998ab7e949e3989e4 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10342 Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/core/NEON/kernels') diff --git a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp index 13f548e39e..362a3e30ea 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp @@ -802,6 +802,13 @@ public: } } + Tr *result_ptr = this->_Cptr + (batch * this->_C_batch_stride) + (multi * this->_C_multi_stride); + + // If we are using an accumulation buffer and this isn't the last pass, don't pass a result pointer. + if (_accumulation_buffer && !last_pass) { + result_ptr = nullptr; + } + // Perform the kernel and merge step, either separately or together as required. kernel_and_merge::run( #ifdef CYCLE_PROFILING @@ -810,7 +817,7 @@ public: // Strategy and panel pointers strat, a_panel, b_ptr, this->_ldb, c_panel, // Result buffer pointers - this->_Cptr + (batch * this->_C_batch_stride) + (multi * this->_C_multi_stride), this->_ldc, + result_ptr, this->_ldc, // K size, and M/N ranges kern_k, start_row, end_row, start_x, end_x, // Only do bias on the first pass -- cgit v1.2.1