aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-07 16:35:35 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-07 16:35:35 +0000
commitd636bc5bdf8b319a5c0f301e0c6125c0268b36cf (patch)
treeca031e04c9ad68ca43208823334dda25ec081c18 /src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp
parentdd2619a777d8faaa17a7cd7c8f20c036903947ad (diff)
downloadComputeLibrary-d636bc5bdf8b319a5c0f301e0c6125c0268b36cf.tar.gz
COMPMID-1451: Fixed zerobuff sizes and clobbers in interleave transforms.
Change-Id: If8fbd04d0817b9e654ffa9715879a2521de66963
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp
index 91ee49229b..500ed787e3 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp
@@ -35,7 +35,7 @@ void TransformImpl<8, 1, false, 2, 2, false>::Transform(T *out, const T *in, int
uint16_t *outptr = (uint16_t *)out;
const uint16_t *inptr = (const uint16_t *)in;
- uint16_t zerobuff[24];
+ uint16_t zerobuff[16]; // 8 for asm loop plus up to 7 for overflow loop
for (int y=y0; y<ymax; y+=8) {
const uint16_t *inptr0 = inptr + y * ldin + k0;
@@ -147,7 +147,7 @@ void TransformImpl<8, 1, false, 2, 2, false>::Transform(T *out, const T *in, int
: [skippf] "r" (skippf)
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
"v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24",
- "v25", "v26", "v27", "v28", "v29", "v30", "v31"
+ "v25", "v26", "v27", "v28", "v29", "v30", "v31", "memory"
);
}