From d636bc5bdf8b319a5c0f301e0c6125c0268b36cf Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 7 Nov 2018 16:35:35 +0000 Subject: COMPMID-1451: Fixed zerobuff sizes and clobbers in interleave transforms. Change-Id: If8fbd04d0817b9e654ffa9715879a2521de66963 --- .../kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp') diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp index 773d56d913..88b40d7c1e 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp @@ -35,7 +35,7 @@ inline void TransformImpl<8, 1, false, 4, 2, false>::Transform(float *out, const float *outptr = out; const __fp16 *inptr = in; - __fp16 zerobuff[8]; + __fp16 zerobuff[16]; // 8 for asm loop plus up to 7 for overflow loop for (int y=y0; y::Transform(float *out, const [inptr4] "+r" (inptr4), [inptr5] "+r" (inptr5), [inptr6] "+r" (inptr6), [inptr7] "+r" (inptr7), [outptr] "+r" (outptr) : : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", - "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23" + "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "memory" ); } -- cgit v1.2.1