From 421405b6a21b124288a750e2da26dc01eb7391cb Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 26 Oct 2018 19:05:32 +0100 Subject: COMPMID-1675: Add SVE support Change-Id: I86679adff556b6ffc9929b35cbf1b59b3958bdb1 --- .../kernels/sve_interleaved_fp16_mla_3VLx8.hpp | 72 ++++ .../sve_interleaved_fp16_mla_3VLx8/generic.cpp | 324 ++++++++++++++++++ .../kernels/sve_interleaved_fp32_mla_3VLx8.hpp | 72 ++++ .../sve_interleaved_fp32_mla_3VLx8/generic.cpp | 333 +++++++++++++++++++ .../kernels/sve_interleaved_s8s32_dot_3VLx8.hpp | 72 ++++ .../sve_interleaved_s8s32_dot_3VLx8/generic.cpp | 334 +++++++++++++++++++ .../kernels/sve_interleaved_u8u32_dot_3VLx8.hpp | 72 ++++ .../sve_interleaved_u8u32_dot_3VLx8/generic.cpp | 328 ++++++++++++++++++ .../kernels/arm_gemm/kernels/sve_sgemm_3VLx8.hpp | 75 +++++ .../arm_gemm/kernels/sve_sgemm_3VLx8/generic.cpp | 366 +++++++++++++++++++++ 10 files changed, 2048 insertions(+) create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8.hpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8/generic.cpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8.hpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8/generic.cpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8.hpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8/generic.cpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8.hpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8/generic.cpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/sve_sgemm_3VLx8.hpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/sve_sgemm_3VLx8/generic.cpp (limited to 'src/core/NEON/kernels/arm_gemm/kernels') diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8.hpp new file mode 100644 index 0000000000..3fd738e673 --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8.hpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#pragma once + +#ifdef __ARM_FEATURE_SVE + + +#include "../std_transforms_sve.hpp" + +namespace arm_gemm { + +// Actual kernel implementations +void sve_interleaved_fp16_mla_3VLx8(const __fp16 *, const __fp16 *, __fp16 *, int, int, int); + +class interleaved_fp16_mla_3VLx8 { +public: + typedef __fp16 operand_type; + typedef __fp16 result_type; + + typedef void (*kern_type)(const __fp16 *, const __fp16 *, __fp16 *, int, int, int); + + /* Kernel blocking parameters */ + static int out_width() + { + return svcnth() * 3; + } + + static int out_height() + { + return 8; + } + + static int k_unroll() + { + return 1; + } + + // Use the standard fixed size transforms. + StdTransformsSVE transforms = {}; + + kern_type kernel=sve_interleaved_fp16_mla_3VLx8; + + interleaved_fp16_mla_3VLx8(const CPUInfo *ci) + { + + } +}; + +} // namespace arm_gemm + +#endif // __ARM_FEATURE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8/generic.cpp new file mode 100644 index 0000000000..92ec888244 --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8/generic.cpp @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2018 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef __ARM_FEATURE_SVE + + +#include "../../asmlib.hpp" + +namespace arm_gemm { + +void sve_interleaved_fp16_mla_3VLx8(const __fp16 *Apanel, const __fp16 *Bpanel, __fp16 *Cpanel, int ablocks, int bblocks, int K) { + const __fp16 *a_ptr = Apanel; + __fp16 *c_ptr = Cpanel; + + const long loops_count = (K / 2) - 1; + const long tails_count = K % 2; + + for (int yb=0; yb transforms = {}; + + kern_type kernel=sve_interleaved_fp32_mla_3VLx8; + + interleaved_fp32_mla_3VLx8(const CPUInfo *ci) + { + + } +}; + +} // namespace arm_gemm + +#endif // __ARM_FEATURE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8/generic.cpp new file mode 100644 index 0000000000..bb08fc7cb0 --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8/generic.cpp @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2018 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef __ARM_FEATURE_SVE + + +#include "../../asmlib.hpp" + +namespace arm_gemm { + +void sve_interleaved_fp32_mla_3VLx8(const float *Apanel, const float *Bpanel, float *Cpanel, int ablocks, int bblocks, int K) { + const float *a_ptr = Apanel; + float *c_ptr = Cpanel; + + const long loops_count = (K / 2) - 1; + const long tails_count = K % 2; + + for (int yb=0; yb +#include "../std_transforms_sve.hpp" + +namespace arm_gemm { + +// Actual kernel implementations +void sve_interleaved_s8s32_dot_3VLx8(const int8_t *, const int8_t *, int32_t *, int, int, int); + +class interleaved_s8s32_dot_3VLx8 { +public: + typedef int8_t operand_type; + typedef int32_t result_type; + + typedef void (*kern_type)(const int8_t *, const int8_t *, int32_t *, int, int, int); + + /* Kernel blocking parameters */ + static int out_width() + { + return svcntw() * 3; + } + + static int out_height() + { + return 8; + } + + static int k_unroll() + { + return 4; + } + + // Use the standard fixed size transforms. + StdTransformsSVE transforms = {}; + + kern_type kernel=sve_interleaved_s8s32_dot_3VLx8; + + interleaved_s8s32_dot_3VLx8(const CPUInfo *ci) + { + + } +}; + +} // namespace arm_gemm + +#endif // __ARM_FEATURE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8/generic.cpp new file mode 100644 index 0000000000..2e994a13f3 --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8/generic.cpp @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2018 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef __ARM_FEATURE_SVE + +#include +#include "../../asmlib.hpp" + +namespace arm_gemm { + +void sve_interleaved_s8s32_dot_3VLx8(const int8_t *Apanel, const int8_t *Bpanel, int32_t *Cpanel, int ablocks, int bblocks, int K) { + const int8_t *a_ptr = Apanel; + int32_t *c_ptr = Cpanel; + + K /= 4; + const long loops_count = (K / 2) - 1; + const long tails_count = K % 2; + + for (int yb=0; yb +#include "../std_transforms_sve.hpp" + +namespace arm_gemm { + +// Actual kernel implementations +void sve_interleaved_u8u32_dot_3VLx8(const uint8_t *, const uint8_t *, uint32_t *, int, int, int); + +class interleaved_u8u32_dot_3VLx8 { +public: + typedef uint8_t operand_type; + typedef uint32_t result_type; + + typedef void (*kern_type)(const uint8_t *, const uint8_t *, uint32_t *, int, int, int); + + /* Kernel blocking parameters */ + static int out_width() + { + return svcntw() * 3; + } + + static int out_height() + { + return 8; + } + + static int k_unroll() + { + return 4; + } + + // Use the standard fixed size transforms. + StdTransformsSVE transforms = {}; + + kern_type kernel=sve_interleaved_u8u32_dot_3VLx8; + + interleaved_u8u32_dot_3VLx8(const CPUInfo *ci) + { + + } +}; + +} // namespace arm_gemm + +#endif // __ARM_FEATURE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8/generic.cpp new file mode 100644 index 0000000000..f4d33a9efa --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8/generic.cpp @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2018 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef __ARM_FEATURE_SVE + +#include +#include "../../asmlib.hpp" + +namespace arm_gemm { + +void sve_interleaved_u8u32_dot_3VLx8(const uint8_t *Apanel, const uint8_t *Bpanel, uint32_t *Cpanel, int ablocks, int bblocks, int K) { + const uint8_t *a_ptr = Apanel; + uint32_t *c_ptr = Cpanel; + + K /= 4; + const long loops_count = (K / 2) - 1; + const long tails_count = K % 2; + + for (int yb=0; yb transforms; + + kern_type kernel=sve_sgemm_3VLx8; + + sgemm_3VLx8(const CPUInfo *ci) { } +}; + +} // namespace arm_gemm + +#endif // __ARM_FEATURE_SVE diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_sgemm_3VLx8/generic.cpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_sgemm_3VLx8/generic.cpp new file mode 100644 index 0000000000..fd6f0b7f98 --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_sgemm_3VLx8/generic.cpp @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef __ARM_FEATURE_SVE + +#include +#include + +#include "../../asmlib.hpp" + +// Kernel implementation. +// +// Assume that "Apanel" points to a chunk of A blocks (each size 8xK) in read-order. +// Assume that "Bpanel" points to a chunk of B blocks (each size 3VLxK) in read-order. +// Assume that "Cpanel" points to a chunk of C output blocks (each size +// 3VLx8), the chunks being arranged in a row major fashion. +// +// Note that the intent of this is that either ablocks or bblocks will be 1 +// - this construction allows the output loop to proceed in either order. + +namespace arm_gemm { + +void sve_sgemm_3VLx8(const float *Apanel, const float *Bpanel, float *Cpanel, int ablocks, int bblocks, int K) { + const float *a_ptr = Apanel; + float *c_ptr = Cpanel; + + // There's no predication inside the kernel, so get a true predicate to use everywhere. + svbool_t ptrue = svptrue_b32(); + + for (int yb=0; yb