aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/mergeresults.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/mergeresults.cpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/mergeresults.cpp9
1 files changed, 7 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp
index 8ca947a6dc..adcdc88fd9 100644
--- a/src/core/NEON/kernels/arm_gemm/mergeresults.cpp
+++ b/src/core/NEON/kernels/arm_gemm/mergeresults.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2018, 2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,6 @@
/* As some of the merges need these headers, but are all included in the
* arm_gemm namespace, put these headers here. */
#include <algorithm>
-#include <limits>
#include <arm_neon.h>
@@ -97,6 +96,12 @@ void MergeResults(Tout * out, const Tin * in, int ldc, int y0, int ymax, int x0,
#include "merges/list.hpp"
+/* Cortex-A53 8x6 SGEMM kernel uses a templated merge as the optimized merge
+ * generator cannot cope with the width (6) not being a multiple of VL (4). */
+#ifdef __aarch64__
+template void MergeResults<6u, 8u, false, float, float>(float *, float const*, int, int, int, int, int, float const *, Activation, bool);
+#endif
+
#if defined(__aarch64__) && defined(__ARM_FP16_ARGS)
template void MergeResults<12u, 8u, false, float, __fp16>(__fp16*, float const*, int, int, int, int, int, __fp16 const*, Activation, bool);
#endif