diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-11-07 16:35:35 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-11-07 16:35:35 +0000 |
commit | d636bc5bdf8b319a5c0f301e0c6125c0268b36cf (patch) | |
tree | ca031e04c9ad68ca43208823334dda25ec081c18 /src | |
parent | dd2619a777d8faaa17a7cd7c8f20c036903947ad (diff) | |
download | ComputeLibrary-d636bc5bdf8b319a5c0f301e0c6125c0268b36cf.tar.gz |
COMPMID-1451: Fixed zerobuff sizes and clobbers in interleave transforms.
Change-Id: If8fbd04d0817b9e654ffa9715879a2521de66963
Diffstat (limited to 'src')
4 files changed, 8 insertions, 8 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp index 492abe51ed..1ccdf60a77 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp @@ -35,7 +35,7 @@ inline void TransformImpl<6, 1, false, 4, 4, false>::Transform(T *out, const T * uint32_t *outptr = reinterpret_cast<uint32_t *>(out); const uint32_t *inptr = reinterpret_cast<const uint32_t *>(in); - uint32_t zerobuff[8]; + uint32_t zerobuff[16]; // 8 for asm loop plus up to 7 for overflow loop for (int y=y0; y<ymax; y+=6) { const uint32_t *inptr0 = inptr + y * ldin + k0; @@ -137,7 +137,7 @@ inline void TransformImpl<6, 1, false, 4, 4, false>::Transform(T *out, const T * : [inptr0] "+r" (inptr0), [inptr1] "+r" (inptr1), [inptr2] "+r" (inptr2), [inptr3] "+r" (inptr3), [inptr4] "+r" (inptr4), [inptr5] "+r" (inptr5), [outptr] "+r" (outptr) : - : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12" + : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "memory" ); } diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp index 91ee49229b..500ed787e3 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp @@ -35,7 +35,7 @@ void TransformImpl<8, 1, false, 2, 2, false>::Transform(T *out, const T *in, int uint16_t *outptr = (uint16_t *)out; const uint16_t *inptr = (const uint16_t *)in; - uint16_t zerobuff[24]; + uint16_t zerobuff[16]; // 8 for asm loop plus up to 7 for overflow loop for (int y=y0; y<ymax; y+=8) { const uint16_t *inptr0 = inptr + y * ldin + k0; @@ -147,7 +147,7 @@ void TransformImpl<8, 1, false, 2, 2, false>::Transform(T *out, const T *in, int : [skippf] "r" (skippf) : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", - "v25", "v26", "v27", "v28", "v29", "v30", "v31" + "v25", "v26", "v27", "v28", "v29", "v30", "v31", "memory" ); } diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp index 7a32f331ea..347eafb56a 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp @@ -35,7 +35,7 @@ inline void TransformImpl<8, 1, false, 4, 4, false>::Transform(T *out, const T * uint32_t *outptr = (uint32_t *)out; const uint32_t *inptr = (uint32_t *)in; - uint32_t zerobuff[8]; + uint32_t zerobuff[16]; // 8 for asm loop plus up to 7 for overflow loop for (int y=y0; y<ymax; y+=8) { const uint32_t *inptr0 = inptr + y * ldin + k0; @@ -156,7 +156,7 @@ inline void TransformImpl<8, 1, false, 4, 4, false>::Transform(T *out, const T * [inptr4] "+r" (inptr4), [inptr5] "+r" (inptr5), [inptr6] "+r" (inptr6), [inptr7] "+r" (inptr7), [outptr] "+r" (outptr) : : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", - "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23" + "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "memory" ); } diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp index 773d56d913..88b40d7c1e 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp @@ -35,7 +35,7 @@ inline void TransformImpl<8, 1, false, 4, 2, false>::Transform(float *out, const float *outptr = out; const __fp16 *inptr = in; - __fp16 zerobuff[8]; + __fp16 zerobuff[16]; // 8 for asm loop plus up to 7 for overflow loop for (int y=y0; y<ymax; y+=8) { const __fp16 *inptr0 = inptr + y * ldin + k0; @@ -172,7 +172,7 @@ inline void TransformImpl<8, 1, false, 4, 2, false>::Transform(float *out, const [inptr4] "+r" (inptr4), [inptr5] "+r" (inptr5), [inptr6] "+r" (inptr6), [inptr7] "+r" (inptr7), [outptr] "+r" (outptr) : : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", - "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23" + "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "memory" ); } |