aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels
diff options
context:
space:
mode:
authorDavid Mansell <David.Mansell@arm.com>2023-08-23 16:00:13 +0100
committerViet-Hoa Do <viet-hoa.do@arm.com>2023-08-29 10:14:10 +0000
commitb7aefd71d07d56b001e795410700cae71a518eca (patch)
treed913041260c04ad56872a4fb45d39c28a18a9528 /src/core/NEON/kernels
parentcea7060684ae6c33fc8e16affc1c7998d17815ae (diff)
downloadComputeLibrary-b7aefd71d07d56b001e795410700cae71a518eca.tar.gz
GEMM: AArch32: Split assembler block in a32_merge_float_8x6.hpp
Inline assembler blocks attempting to bind 8 integer registers don't compile in certain configurations (notably GCC 13.2 debug builds with -O0 -g). Fix this by splitting the offending block into two separate parts (straightforward as there is no flow control in the block). Fixes: COMPMID-6532 Signed-off-by: David Mansell <David.Mansell@arm.com> Change-Id: I80e9a10e6a91574176d50e63c45fab055aefa659 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10197 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Emanuele Rocca <ema@linux.it> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels')
-rw-r--r--src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp10
1 files changed, 8 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp b/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp
index bea455ca67..989bb17dfb 100644
--- a/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp
+++ b/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2018,2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -274,7 +274,13 @@ void MergeResults<8, 6, false>(float *out, const float *in, const int ldout, con
"VMIN.f32 q6, q6, %q[maxv]\n"
"VMIN.f32 q7, q7, %q[maxv]\n"
"VST1.32 {d12-d15}, [%[outptr3]]!\n"
+ : [outptr0] "+r" (outptr0), [outptr1] "+r" (outptr1), [outptr2] "+r" (outptr2), [outptr3] "+r" (outptr3),
+ [inptr] "+r" (inptr)
+ : [minv] "w" (minv), [maxv] "w" (maxv), [biasptr] "r" (biasptr)
+ : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "memory"
+ );
+ __asm __volatile (
// Rows 4-5
"VLD1.32 {d8-d11}, [%[inptr]]!\n"
"VLD1.32 {d12-d15}, [%[inptr]]!\n"
@@ -296,7 +302,7 @@ void MergeResults<8, 6, false>(float *out, const float *in, const int ldout, con
"VMIN.f32 q6, q6, %q[maxv]\n"
"VMIN.f32 q7, q7, %q[maxv]\n"
"VST1.32 {d12-d15}, [%[outptr5]]!\n"
- : [outptr0] "+r" (outptr0), [outptr1] "+r" (outptr1), [outptr2] "+r" (outptr2), [outptr3] "+r" (outptr3),
+ : [outptr3] "+r" (outptr3),
[outptr4] "+r" (outptr4), [outptr5] "+r" (outptr5), [inptr] "+r" (inptr)
: [minv] "w" (minv), [maxv] "w" (maxv), [biasptr] "r" (biasptr)
: "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "memory"