Fix convolution with bias segmentation fault issue

Indirect hybrid kernels read the full width of the bias. So we need to detect the case where we are writing a partial block and pad the bias for that block. Resolves: COMPMID-4321 Signed-off-by: Sheri Zhang <sheri.zhang@arm.com> Change-Id: Ib8d8637724e34d1eae6cc22223df8d81a6d0ded6 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5380 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Sheri Zhang <sheri.zhang@arm.com> 2021-04-07 20:01:18 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2021-04-08 11:22:18 +0000
commit: b71322dc037267219f95da406affd42e880a0cc6 (patch)
tree: b4698d11414e0e38ada1d279e5ec4af5126781fb
parent: 534b889482967a4b4e7d6443bad4e4bdcb4999d4 (diff)
download: ComputeLibrary-b71322dc037267219f95da406affd42e880a0cc6.tar.gz
1 files changed, 30 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp
index 5d5f21507f..41fecc6bec 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp
@@ -79,7 +79,36 @@ void run_hybrid_kernel<Nothing, false>::run(
 #endif
     UNUSED(kern_k);
 
-    strat.kernel(num_strings, string_ptr, A_arg, M, N, b_ptr, output_arg, bias_ptr, act, accumulate);
+    /* Indirect hybrid kernels read the full width of the bias. So we need to detect the case where we are writing
+     * a partial block and pad the bias for that block. */
+    if (bias_ptr && !accumulate && (N % strategy::out_width() != 0)) {
+        /* Break N into "N_bulk" (a multiple of output width) and "N_remainder" */
+        unsigned int N_remainder = N % strategy::out_width();
+        unsigned int N_bulk = N - N_remainder;
+
+        /* Output argument to be used for the tail */
+        IndirectOutputArg<Tr> offset_output = output_arg;
+
+        /* If there is a "bulk" to be processed, handle that and update "offset_output" appropriately. */
+        if (N_bulk > 0) {
+            strat.kernel(num_strings, string_ptr, A_arg, M, N_bulk, b_ptr, output_arg, bias_ptr, act, accumulate);
+
+            if (output_arg.is_indirect) {
+                offset_output = IndirectOutputArg<Tr>(output_arg.indirect.ptr, output_arg.indirect.offset + N_bulk);
+            } else {
+                offset_output = IndirectOutputArg<Tr>(output_arg.direct.base + N_bulk, output_arg.direct.stride);
+            }
+        }
+
+        /* Pad the bias buffer for the remainder */
+        Tr *bias_pad_buffer = reinterpret_cast<Tr *>(alloca(strategy::out_width() * sizeof(Tr)));
+        memcpy(bias_pad_buffer, bias_ptr + N_bulk, N_remainder * sizeof(Tr));
+
+        /* Process the remainder, offsetting the B pointer as needed. */
+        strat.kernel(num_strings, string_ptr, A_arg, M, N_remainder, b_ptr + (N_bulk * kern_k), offset_output, bias_pad_buffer, act, accumulate);
+    } else {
+        strat.kernel(num_strings, string_ptr, A_arg, M, N, b_ptr, output_arg, bias_ptr, act, accumulate);
+    }
 }
 
 template<>
author	Sheri Zhang <sheri.zhang@arm.com>	2021-04-07 20:01:18 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2021-04-08 11:22:18 +0000
commit	b71322dc037267219f95da406affd42e880a0cc6 (patch)
tree	b4698d11414e0e38ada1d279e5ec4af5126781fb
parent	534b889482967a4b4e7d6443bad4e4bdcb4999d4 (diff)
download	ComputeLibrary-b71322dc037267219f95da406affd42e880a0cc6.tar.gz