From b71322dc037267219f95da406affd42e880a0cc6 Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Wed, 7 Apr 2021 20:01:18 +0100 Subject: Fix convolution with bias segmentation fault issue Indirect hybrid kernels read the full width of the bias. So we need to detect the case where we are writing a partial block and pad the bias for that block. Resolves: COMPMID-4321 Signed-off-by: Sheri Zhang Change-Id: Ib8d8637724e34d1eae6cc22223df8d81a6d0ded6 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5380 Reviewed-by: Michele Di Giorgio Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp | 31 +++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'src/core/NEON/kernels') diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp index 5d5f21507f..41fecc6bec 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp @@ -79,7 +79,36 @@ void run_hybrid_kernel::run( #endif UNUSED(kern_k); - strat.kernel(num_strings, string_ptr, A_arg, M, N, b_ptr, output_arg, bias_ptr, act, accumulate); + /* Indirect hybrid kernels read the full width of the bias. So we need to detect the case where we are writing + * a partial block and pad the bias for that block. */ + if (bias_ptr && !accumulate && (N % strategy::out_width() != 0)) { + /* Break N into "N_bulk" (a multiple of output width) and "N_remainder" */ + unsigned int N_remainder = N % strategy::out_width(); + unsigned int N_bulk = N - N_remainder; + + /* Output argument to be used for the tail */ + IndirectOutputArg offset_output = output_arg; + + /* If there is a "bulk" to be processed, handle that and update "offset_output" appropriately. */ + if (N_bulk > 0) { + strat.kernel(num_strings, string_ptr, A_arg, M, N_bulk, b_ptr, output_arg, bias_ptr, act, accumulate); + + if (output_arg.is_indirect) { + offset_output = IndirectOutputArg(output_arg.indirect.ptr, output_arg.indirect.offset + N_bulk); + } else { + offset_output = IndirectOutputArg(output_arg.direct.base + N_bulk, output_arg.direct.stride); + } + } + + /* Pad the bias buffer for the remainder */ + Tr *bias_pad_buffer = reinterpret_cast(alloca(strategy::out_width() * sizeof(Tr))); + memcpy(bias_pad_buffer, bias_ptr + N_bulk, N_remainder * sizeof(Tr)); + + /* Process the remainder, offsetting the B pointer as needed. */ + strat.kernel(num_strings, string_ptr, A_arg, M, N_remainder, b_ptr + (N_bulk * kern_k), offset_output, bias_pad_buffer, act, accumulate); + } else { + strat.kernel(num_strings, string_ptr, A_arg, M, N, b_ptr, output_arg, bias_ptr, act, accumulate); + } } template<> -- cgit v1.2.1