aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-07 16:35:35 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-07 16:35:35 +0000
commitd636bc5bdf8b319a5c0f301e0c6125c0268b36cf (patch)
treeca031e04c9ad68ca43208823334dda25ec081c18 /src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp
parentdd2619a777d8faaa17a7cd7c8f20c036903947ad (diff)
downloadComputeLibrary-d636bc5bdf8b319a5c0f301e0c6125c0268b36cf.tar.gz
COMPMID-1451: Fixed zerobuff sizes and clobbers in interleave transforms.
Change-Id: If8fbd04d0817b9e654ffa9715879a2521de66963
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp
index 492abe51ed..1ccdf60a77 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp
@@ -35,7 +35,7 @@ inline void TransformImpl<6, 1, false, 4, 4, false>::Transform(T *out, const T *
uint32_t *outptr = reinterpret_cast<uint32_t *>(out);
const uint32_t *inptr = reinterpret_cast<const uint32_t *>(in);
- uint32_t zerobuff[8];
+ uint32_t zerobuff[16]; // 8 for asm loop plus up to 7 for overflow loop
for (int y=y0; y<ymax; y+=6) {
const uint32_t *inptr0 = inptr + y * ldin + k0;
@@ -137,7 +137,7 @@ inline void TransformImpl<6, 1, false, 4, 4, false>::Transform(T *out, const T *
: [inptr0] "+r" (inptr0), [inptr1] "+r" (inptr1), [inptr2] "+r" (inptr2), [inptr3] "+r" (inptr3),
[inptr4] "+r" (inptr4), [inptr5] "+r" (inptr5), [outptr] "+r" (outptr)
:
- : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12"
+ : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "memory"
);
}