diff options
author | David Mansell <David.Mansell@arm.com> | 2023-03-10 13:48:50 +0000 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2023-03-13 16:40:36 +0000 |
commit | aaa9da1efa83911c7a67d50811ad669a92a7d12f (patch) | |
tree | 457c72960724b6e9b2e50e8b039735515c629216 /src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp | |
parent | 0ffc88b7ae8f73fc66338a4eee5348bab634edf7 (diff) | |
download | ComputeLibrary-aaa9da1efa83911c7a67d50811ad669a92a7d12f.tar.gz |
arm_gemm: Add SME2 FP16 kernels.
Resolves: COMPMID-5966
Change-Id: Ic0d694493178da029a297643855bd0cff01b174f
Signed-off-by: David Mansell <David.Mansell@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9302
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp b/src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp index 468915a046..7ed8af9b45 100644 --- a/src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp +++ b/src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -97,6 +97,19 @@ template void IndirectInterleave<1, 1, VLType::SME>(__fp16 *, const __fp16 * con template void ConvolutionInterleave<1, 1, VLType::SME>(__fp16 *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<1, 1, VLType::SME>(__fp16 *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +/* FP16: SME implementations with dot-product type outer product */ +template void IndirectInterleave<2, 2, VLType::SME>(__fp16 *, const __fp16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +template void ConvolutionInterleave<2, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +template void Interleave<2, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); + +template void IndirectInterleave<1, 2, VLType::SME>(__fp16 *, const __fp16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +template void ConvolutionInterleave<1, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +template void Interleave<1, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); + +template void IndirectInterleave<4, 2, VLType::SME>(__fp16 *, const __fp16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +template void ConvolutionInterleave<4, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +template void Interleave<4, 2, VLType::SME>(__fp16 *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); + /* FP32 fast-mode: SME implementations */ template void IndirectInterleave<1, 2, VLType::SME>(bfloat16 *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<1, 2, VLType::SME>(bfloat16 *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); |