aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp
diff options
context:
space:
mode:
authorDavid Mansell <David.Mansell@arm.com>2023-03-10 13:48:50 +0000
committerViet-Hoa Do <viet-hoa.do@arm.com>2023-03-13 16:40:36 +0000
commitaaa9da1efa83911c7a67d50811ad669a92a7d12f (patch)
tree457c72960724b6e9b2e50e8b039735515c629216 /src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp
parent0ffc88b7ae8f73fc66338a4eee5348bab634edf7 (diff)
downloadComputeLibrary-aaa9da1efa83911c7a67d50811ad669a92a7d12f.tar.gz
arm_gemm: Add SME2 FP16 kernels.
Resolves: COMPMID-5966 Change-Id: Ic0d694493178da029a297643855bd0cff01b174f Signed-off-by: David Mansell <David.Mansell@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9302 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp15
1 files changed, 14 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp b/src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp
index 468915a046..7ed8af9b45 100644
--- a/src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp
+++ b/src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -97,6 +97,19 @@ template void IndirectInterleave<1, 1, VLType::SME>(__fp16 *, const __fp16 * con
template void ConvolutionInterleave<1, 1, VLType::SME>(__fp16 *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<1, 1, VLType::SME>(__fp16 *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
+/* FP16: SME implementations with dot-product type outer product */
+template void IndirectInterleave<2, 2, VLType::SME>(__fp16 *, const __fp16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
+template void ConvolutionInterleave<2, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
+template void Interleave<2, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
+
+template void IndirectInterleave<1, 2, VLType::SME>(__fp16 *, const __fp16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
+template void ConvolutionInterleave<1, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
+template void Interleave<1, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
+
+template void IndirectInterleave<4, 2, VLType::SME>(__fp16 *, const __fp16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
+template void ConvolutionInterleave<4, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
+template void Interleave<4, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
+
/* FP32 fast-mode: SME implementations */
template void IndirectInterleave<1, 2, VLType::SME>(bfloat16 *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<1, 2, VLType::SME>(bfloat16 *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);