aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp
index 7a32f331ea..347eafb56a 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp
@@ -35,7 +35,7 @@ inline void TransformImpl<8, 1, false, 4, 4, false>::Transform(T *out, const T *
uint32_t *outptr = (uint32_t *)out;
const uint32_t *inptr = (uint32_t *)in;
- uint32_t zerobuff[8];
+ uint32_t zerobuff[16]; // 8 for asm loop plus up to 7 for overflow loop
for (int y=y0; y<ymax; y+=8) {
const uint32_t *inptr0 = inptr + y * ldin + k0;
@@ -156,7 +156,7 @@ inline void TransformImpl<8, 1, false, 4, 4, false>::Transform(T *out, const T *
[inptr4] "+r" (inptr4), [inptr5] "+r" (inptr5), [inptr6] "+r" (inptr6), [inptr7] "+r" (inptr7), [outptr] "+r" (outptr)
:
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
- "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "memory"
);
}