diff options
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/mergeresults.cpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/mergeresults.cpp | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp index 8ca947a6dc..adcdc88fd9 100644 --- a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp +++ b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,6 @@ /* As some of the merges need these headers, but are all included in the * arm_gemm namespace, put these headers here. */ #include <algorithm> -#include <limits> #include <arm_neon.h> @@ -97,6 +96,12 @@ void MergeResults(Tout * out, const Tin * in, int ldc, int y0, int ymax, int x0, #include "merges/list.hpp" +/* Cortex-A53 8x6 SGEMM kernel uses a templated merge as the optimized merge + * generator cannot cope with the width (6) not being a multiple of VL (4). */ +#ifdef __aarch64__ +template void MergeResults<6u, 8u, false, float, float>(float *, float const*, int, int, int, int, int, float const *, Activation, bool); +#endif + #if defined(__aarch64__) && defined(__ARM_FP16_ARGS) template void MergeResults<12u, 8u, false, float, __fp16>(__fp16*, float const*, int, int, int, int, int, __fp16 const*, Activation, bool); #endif |