From 37d080f2f11cfd734104b76512e1fb191486216e Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 21 Jun 2019 18:43:12 +0100 Subject: COMPMID-2378: Sanitize GEMM configuration for NEON Change-Id: I7859b82b2059e14685f8792424648ac5eacd67f1 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/1418 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Michalis Spyrou Tested-by: Arm Jenkins --- src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp | 4 ++++ src/core/NEON/kernels/arm_gemm/merges/a64_merge_float_12x8.hpp | 6 ++++++ .../NEON/kernels/arm_gemm/merges/a64_merge_float_to_half_12x8.hpp | 6 ++++++ src/core/NEON/kernels/arm_gemm/merges/a64_merge_half_24x8.hpp | 6 ++++++ src/core/NEON/kernels/arm_gemm/merges/a64_merge_int32_12x8.hpp | 6 ++++++ .../NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp | 4 ++++ .../kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp | 4 ++++ .../NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp | 6 ++++++ .../NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp | 6 ++++++ .../arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp | 6 ++++++ 10 files changed, 54 insertions(+) (limited to 'src/core/NEON/kernels/arm_gemm') diff --git a/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp b/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp index f4485bcbb1..e1af2d4490 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/a32_merge_float_8x6.hpp @@ -61,12 +61,16 @@ inline void MergeResults<8, 6, false>(float *out, const float *in, const int ldo switch ((y + 5) - ymax) { case 4: outptr1 = dummyres; + // fall through case 3: outptr2 = dummyres; + // fall through case 2: outptr3 = dummyres; + // fall through case 1: outptr4 = dummyres; + // fall through case 0: outptr5 = dummyres; break; diff --git a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_float_12x8.hpp b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_float_12x8.hpp index be23978b80..9fca4e3a84 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_float_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_float_12x8.hpp @@ -63,16 +63,22 @@ inline void MergeResults<12, 8, false>(float *out, const float *in, const int ld switch ((y + 7) - ymax) { case 6: outptr1 = dummyres; + // fall through case 5: outptr2 = dummyres; + // fall through case 4: outptr3 = dummyres; + // fall through case 3: outptr4 = dummyres; + // fall through case 2: outptr5 = dummyres; + // fall through case 1: outptr6 = dummyres; + // fall through case 0: outptr7 = dummyres; break; diff --git a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_float_to_half_12x8.hpp b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_float_to_half_12x8.hpp index 9e5eb88dc1..0e638eef1c 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_float_to_half_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_float_to_half_12x8.hpp @@ -66,16 +66,22 @@ inline void MergeResults<12,8,false>(__fp16 *out, const float *in, int ldout, in switch ((y + 7) - ymax) { case 6: outptr1 = dummyres; + // fall through case 5: outptr2 = dummyres; + // fall through case 4: outptr3 = dummyres; + // fall through case 3: outptr4 = dummyres; + // fall through case 2: outptr5 = dummyres; + // fall through case 1: outptr6 = dummyres; + // fall through case 0: outptr7 = dummyres; break; diff --git a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_half_24x8.hpp b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_half_24x8.hpp index 3ed43b10bd..60cc2f32da 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_half_24x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_half_24x8.hpp @@ -65,16 +65,22 @@ inline void MergeResults<24, 8>(__fp16 *out, const __fp16 *in, const int ldout, switch ((y + 7) - ymax) { case 6: outptr1 = dummyres; + // fall through case 5: outptr2 = dummyres; + // fall through case 4: outptr3 = dummyres; + // fall through case 3: outptr4 = dummyres; + // fall through case 2: outptr5 = dummyres; + // fall through case 1: outptr6 = dummyres; + // fall through case 0: outptr7 = dummyres; break; diff --git a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_int32_12x8.hpp b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_int32_12x8.hpp index 35d4cc5d73..0212dfdbb6 100644 --- a/src/core/NEON/kernels/arm_gemm/merges/a64_merge_int32_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/merges/a64_merge_int32_12x8.hpp @@ -63,16 +63,22 @@ inline void MergeResults<12, 8, false>(int32_t *out, const int32_t *in, const in switch ((y + 7) - ymax) { case 6: outptr1 = dummyres; + // fall through case 5: outptr2 = dummyres; + // fall through case 4: outptr3 = dummyres; + // fall through case 3: outptr4 = dummyres; + // fall through case 2: outptr5 = dummyres; + // fall through case 1: outptr6 = dummyres; + // fall through case 0: outptr7 = dummyres; break; diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp index 20ad301a18..a460fdfcf4 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp @@ -60,12 +60,16 @@ inline void TransformImpl<6, 1, false, 4, 4, false>::Transform(T *out, const T * /* Everything falls through in here */ case 4: inptr1 = zerobuff; + // fall through case 3: inptr2 = zerobuff; + // fall through case 2: inptr3 = zerobuff; + // fall through case 1: inptr4 = zerobuff; + // fall through case 0: inptr5 = zerobuff; break; diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp index 2f513a6118..6a15fc42e4 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp @@ -57,8 +57,10 @@ void TransformImpl<4, 16, false, 1, 1, false>::Transform(T *out, const T *in, in /* Everything falls through in here */ case 2: inptr1 = zerobuff; + // fall through case 1: inptr2 = zerobuff; + // fall through case 0: inptr3 = zerobuff; break; @@ -93,8 +95,10 @@ void TransformImpl<4, 16, false, 1, 1, false>::Transform(T *out, const T *in, in /* Everything falls through in here */ case 2: inptr1 = zerobuff; + // fall through case 1: inptr2 = zerobuff; + // fall through case 0: inptr3 = zerobuff; break; diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp index 27136d144a..0028ab08a9 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp @@ -64,16 +64,22 @@ void TransformImpl<8, 1, false, 2, 2, false>::Transform(T *out, const T *in, int /* Everything falls through in here */ case 6: inptr1 = zerobuff; + // fall through case 5: inptr2 = zerobuff; + // fall through case 4: inptr3 = zerobuff; + // fall through case 3: inptr4 = zerobuff; + // fall through case 2: inptr5 = zerobuff; + // fall through case 1: inptr6 = zerobuff; + // fall through case 0: inptr7 = zerobuff; break; diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp index 54822c81b0..758c084a46 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp @@ -64,16 +64,22 @@ inline void TransformImpl<8, 1, false, 4, 4, false>::Transform(T *out, const T * /* Everything falls through in here */ case 6: inptr1 = zerobuff; + // fall through case 5: inptr2 = zerobuff; + // fall through case 4: inptr3 = zerobuff; + // fall through case 3: inptr4 = zerobuff; + // fall through case 2: inptr5 = zerobuff; + // fall through case 1: inptr6 = zerobuff; + // fall through case 0: inptr7 = zerobuff; break; diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp index 0606330d27..de8e95a6d7 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp @@ -64,16 +64,22 @@ inline void TransformImpl<8, 1, false, 4, 2, false>::Transform(float *out, const /* Everything falls through in here */ case 6: inptr1 = zerobuff; + // fall through case 5: inptr2 = zerobuff; + // fall through case 4: inptr3 = zerobuff; + // fall through case 3: inptr4 = zerobuff; + // fall through case 2: inptr5 = zerobuff; + // fall through case 1: inptr6 = zerobuff; + // fall through case 0: inptr7 = zerobuff; break; -- cgit v1.2.1