From 1d480652b820317fc97ccbc3cb517e3b9e8be197 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 23 Jan 2019 11:24:50 +0000 Subject: COMPMID-1867: Add u8 and s8 hybrid assembly kernels. Change-Id: Ifeb005f9d18d19feff11949474cce84d9e03749c Reviewed-on: https://review.mlplatform.org/565 Reviewed-by: Michalis Spyrou Tested-by: Arm Jenkins --- .../kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp | 8 +- .../kernels/arm_gemm/kernels/a64_gemm_s16_12x8.hpp | 8 +- .../kernels/arm_gemm/kernels/a64_gemm_s8_12x8.hpp | 8 +- .../kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp | 8 +- .../kernels/arm_gemm/kernels/a64_gemm_u16_12x8.hpp | 8 +- .../kernels/arm_gemm/kernels/a64_gemm_u8_12x8.hpp | 8 +- .../kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp | 8 +- .../kernels/arm_gemm/kernels/a64_hgemm_24x8.hpp | 8 +- .../arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4.hpp | 77 + .../kernels/a64_hybrid_s8s32_dot_16x4/a55.cpp | 2271 ++++++++++++++++++++ .../kernels/a64_hybrid_s8s32_dot_16x4/generic.cpp | 1605 ++++++++++++++ .../arm_gemm/kernels/a64_hybrid_u8u32_dot_16x4.hpp | 77 + .../kernels/a64_hybrid_u8u32_dot_16x4/a55.cpp | 2271 ++++++++++++++++++++ .../kernels/a64_hybrid_u8u32_dot_16x4/generic.cpp | 1605 ++++++++++++++ .../kernels/arm_gemm/kernels/a64_sgemm_12x8.hpp | 8 +- .../a64_sgemm_nativeA_pretransposeB_16x4.hpp | 8 +- .../arm_gemm/kernels/a64_sgemm_native_16x4.hpp | 8 +- .../arm_gemm/kernels/a64_sgemv_pretransposed.hpp | 25 +- .../kernels/arm_gemm/kernels/a64_sgemv_trans.hpp | 11 +- .../arm_gemm/kernels/sve_hybrid_fp32_mla_4VLx4.hpp | 8 +- .../kernels/sve_interleaved_fp16_mla_3VLx8.hpp | 6 +- .../kernels/sve_interleaved_fp32_mla_3VLx8.hpp | 6 +- .../kernels/sve_interleaved_s8s32_dot_3VLx8.hpp | 6 +- .../kernels/sve_interleaved_u8u32_dot_3VLx8.hpp | 6 +- .../arm_gemm/kernels/sve_native_fp32_mla_4VLx4.hpp | 8 +- .../kernels/sve_native_s8s32_dot_4VLx4.hpp | 8 +- .../kernels/sve_native_u8u32_dot_4VLx4.hpp | 8 +- .../arm_gemm/kernels/sve_smallK_fp32_mla_1VLx4.hpp | 8 +- .../kernels/sve_smallK_hybrid_fp32_mla_1VLx4.hpp | 8 +- 29 files changed, 8013 insertions(+), 89 deletions(-) create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4.hpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4/a55.cpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4/generic.cpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_16x4.hpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_16x4/a55.cpp create mode 100644 src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_16x4/generic.cpp (limited to 'src/core/NEON/kernels/arm_gemm/kernels') diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp index 06e62456dc..234972270c 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,15 +50,15 @@ public: typedef void (*kern_type)(const float *, const float *, float *, int, int, int); /* Kernel blocking parameters */ - static int out_width() { + static unsigned int out_width() { return 8; } - static int out_height() { + static unsigned int out_height() { return 6; } - static int k_unroll() { + static unsigned int k_unroll() { return 1; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_12x8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_12x8.hpp index 95a2bc2fbc..2fcb587df1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_12x8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,15 +48,15 @@ public: typedef void (*kern_type)(const int16_t *, const int16_t *, int32_t *, int, int, int); /* Kernel blocking parameters */ - static int out_width() { + static unsigned int out_width() { return 12; } - static int out_height() { + static unsigned int out_height() { return 8; } - static int k_unroll() { + static unsigned int k_unroll() { return 1; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8.hpp index fdc0200435..cc205dc6e3 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,15 +43,15 @@ public: typedef void (*kern_type)(const int8_t *, const int8_t *, int32_t *, int, int, int); /* Kernel blocking parameters */ - static int out_width() { + static unsigned int out_width() { return 12; } - static int out_height() { + static unsigned int out_height() { return 8; } - static int k_unroll() { + static unsigned int k_unroll() { return 4; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp index be7ead9f48..71c666ad00 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,15 +42,15 @@ public: typedef void (*kern_type)(const int8_t *, const int8_t *, int32_t *, int, int, int); /* Kernel blocking parameters */ - static int out_width() { + static unsigned int out_width() { return 4; } - static int out_height() { + static unsigned int out_height() { return 4; } - static int k_unroll() { + static unsigned int k_unroll() { return 16; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8.hpp index d2692ba77f..3d5c92c622 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,15 +48,15 @@ public: typedef void (*kern_type)(const uint16_t *, const uint16_t *, uint32_t *, int, int, int); /* Kernel blocking parameters */ - static int out_width() { + static unsigned int out_width() { return 12; } - static int out_height() { + static unsigned int out_height() { return 8; } - static int k_unroll() { + static unsigned int k_unroll() { return 1; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8.hpp index a252abfd3e..9032ba67b3 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,15 +53,15 @@ public: static const bool B_transpose = true; /* Kernel blocking parameters */ - static int out_width() { + static unsigned int out_width() { return 12; } - static int out_height() { + static unsigned int out_height() { return 8; } - static int k_unroll() { + static unsigned int k_unroll() { return 4; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp index 2da3ecd4f8..fda7657b2b 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,15 +50,15 @@ public: static const bool B_transpose = true; /* Kernel blocking parameters */ - static int out_width() { + static unsigned int out_width() { return 4; } - static int out_height() { + static unsigned int out_height() { return 4; } - static int k_unroll() { + static unsigned int k_unroll() { return 16; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8.hpp index 911a4ebb01..5b850b7a20 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,15 +47,15 @@ public: typedef void (*kern_type)(const __fp16 *, const __fp16 *, __fp16 *, int, int, int); /* Kernel blocking parameters */ - static int out_width() { + static unsigned int out_width() { return 24; } - static int out_height() { + static unsigned int out_height() { return 8; } - static int k_unroll() { + static unsigned int k_unroll() { return 1; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4.hpp new file mode 100644 index 0000000000..c8934dff8a --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4.hpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#pragma once + +#ifdef __aarch64__ + +#include +#include "../std_transforms_fixed.hpp" + +namespace arm_gemm +{ + +// Actual kernel implementations +void a64_hybrid_s8s32_dot_16x4(const int8_t *, int, const int8_t *, int32_t *, int, int32_t, int, int, int); +void a64_hybrid_s8s32_dot_16x4_a55(const int8_t *, int, const int8_t *, int32_t *, int, int32_t, int, int, int); + +class hybrid_s8s32_dot_16x4 +{ +public: + typedef int8_t operand_type; + typedef int32_t result_type; + + typedef void (*kern_type)(const int8_t *, int, const int8_t *, int32_t *, int, int32_t, int, int, int); + + /* Kernel blocking parameters */ + static unsigned int out_height() + { + return 4; + } + + static unsigned int out_width() + { + return 16; + } + + static unsigned int k_unroll() + { + return 4; + } + + StdTransformsFixed transforms = {}; + + // Default to the generic kernel + kern_type kernel=a64_hybrid_s8s32_dot_16x4; + + hybrid_s8s32_dot_16x4(const CPUInfo *ci) + { + if (ci->get_cpu_model() == CPUModel::A55r1) { + kernel = a64_hybrid_s8s32_dot_16x4_a55; + } + } +}; + +} // namespace arm_gemm + +#endif // __aarch64__ diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4/a55.cpp new file mode 100644 index 0000000000..48bf842ca5 --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4/a55.cpp @@ -0,0 +1,2271 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef __aarch64__ + +#include + +#include +#include "../../asmlib.hpp" +#include "../../utils.hpp" + +namespace arm_gemm { + +void a64_hybrid_s8s32_dot_16x4_a55(const int8_t *A, int lda, const int8_t *B, int32_t *C, int ldc, int32_t beta, int M, int N, int K) { + const long beta0 = (beta == 0); + const int K_stride = ((K + 3) / 4) * 4; + const long loops_count = ((K + 16) / 32) - 1; + K -= loops_count * 32; + const long regs_count = (K / 16) - 1; + + for (int y=0; y + +#include +#include "../../asmlib.hpp" +#include "../../utils.hpp" + +namespace arm_gemm { + +void a64_hybrid_s8s32_dot_16x4(const int8_t *A, int lda, const int8_t *B, int32_t *C, int ldc, int32_t beta, int M, int N, int K) { + const long beta0 = (beta == 0); + const int K_stride = ((K + 3) / 4) * 4; + const long loops_count = ((K + 16) / 32) - 1; + K -= loops_count * 32; + const long regs_count = (K / 16) - 1; + + for (int y=0; y +#include "../std_transforms_fixed.hpp" + +namespace arm_gemm +{ + +// Actual kernel implementations +void a64_hybrid_u8u32_dot_16x4(const uint8_t *, int, const uint8_t *, uint32_t *, int, uint32_t, int, int, int); +void a64_hybrid_u8u32_dot_16x4_a55(const uint8_t *, int, const uint8_t *, uint32_t *, int, uint32_t, int, int, int); + +class hybrid_u8u32_dot_16x4 +{ +public: + typedef uint8_t operand_type; + typedef uint32_t result_type; + + typedef void (*kern_type)(const uint8_t *, int, const uint8_t *, uint32_t *, int, uint32_t, int, int, int); + + /* Kernel blocking parameters */ + static unsigned int out_height() + { + return 4; + } + + static unsigned int out_width() + { + return 16; + } + + static unsigned int k_unroll() + { + return 4; + } + + StdTransformsFixed transforms = {}; + + // Default to the generic kernel + kern_type kernel=a64_hybrid_u8u32_dot_16x4; + + hybrid_u8u32_dot_16x4(const CPUInfo *ci) + { + if (ci->get_cpu_model() == CPUModel::A55r1) { + kernel = a64_hybrid_u8u32_dot_16x4_a55; + } + } +}; + +} // namespace arm_gemm + +#endif // __aarch64__ diff --git a/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_16x4/a55.cpp b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_16x4/a55.cpp new file mode 100644 index 0000000000..230ecdce2d --- /dev/null +++ b/src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_16x4/a55.cpp @@ -0,0 +1,2271 @@ +/* + * Copyright (c) 2019 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef __aarch64__ + +#include + +#include +#include "../../asmlib.hpp" +#include "../../utils.hpp" + +namespace arm_gemm { + +void a64_hybrid_u8u32_dot_16x4_a55(const uint8_t *A, int lda, const uint8_t *B, uint32_t *C, int ldc, uint32_t beta, int M, int N, int K) { + const long beta0 = (beta == 0u); + const int K_stride = ((K + 3) / 4) * 4; + const long loops_count = ((K + 16) / 32) - 1; + K -= loops_count * 32; + const long regs_count = (K / 16) - 1; + + for (int y=0; y + +#include +#include "../../asmlib.hpp" +#include "../../utils.hpp" + +namespace arm_gemm { + +void a64_hybrid_u8u32_dot_16x4(const uint8_t *A, int lda, const uint8_t *B, uint32_t *C, int ldc, uint32_t beta, int M, int N, int K) { + const long beta0 = (beta == 0u); + const int K_stride = ((K + 3) / 4) * 4; + const long loops_count = ((K + 16) / 32) - 1; + K -= loops_count * 32; + const long regs_count = (K / 16) - 1; + + for (int y=0; y() * 4; } - static int k_unroll() + static unsigned int k_unroll() { return 1; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8.hpp index 9d88b60cee..2ca4ce25e8 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8.hpp @@ -41,17 +41,17 @@ public: typedef void (*kern_type)(const __fp16 *, const __fp16 *, __fp16 *, int, int, int); /* Kernel blocking parameters */ - static int out_width() + static unsigned int out_width() { return get_vector_length<__fp16>() * 3; } - static int out_height() + static unsigned int out_height() { return 8; } - static int k_unroll() + static unsigned int k_unroll() { return 1; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8.hpp index 2e8f261fe1..8c1fe6d0b6 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8.hpp @@ -41,17 +41,17 @@ public: typedef void (*kern_type)(const float *, const float *, float *, int, int, int); /* Kernel blocking parameters */ - static int out_width() + static unsigned int out_width() { return get_vector_length() * 3; } - static int out_height() + static unsigned int out_height() { return 8; } - static int k_unroll() + static unsigned int k_unroll() { return 1; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8.hpp index 67154e6a3f..cbb21387b1 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8.hpp @@ -41,17 +41,17 @@ public: typedef void (*kern_type)(const int8_t *, const int8_t *, int32_t *, int, int, int); /* Kernel blocking parameters */ - static int out_width() + static unsigned int out_width() { return get_vector_length() * 3; } - static int out_height() + static unsigned int out_height() { return 8; } - static int k_unroll() + static unsigned int k_unroll() { return 4; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8.hpp index 628c5a868e..99c039e121 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8.hpp @@ -41,17 +41,17 @@ public: typedef void (*kern_type)(const uint8_t *, const uint8_t *, uint32_t *, int, int, int); /* Kernel blocking parameters */ - static int out_width() + static unsigned int out_width() { return get_vector_length() * 3; } - static int out_height() + static unsigned int out_height() { return 8; } - static int k_unroll() + static unsigned int k_unroll() { return 4; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_native_fp32_mla_4VLx4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_native_fp32_mla_4VLx4.hpp index fcc80d9fe5..d7f9f20074 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_native_fp32_mla_4VLx4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_native_fp32_mla_4VLx4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,17 +42,17 @@ public: typedef void (*kern_type)(const float *, int, const float *, int ldb, float *, int, float, int, int, int); /* Kernel blocking parameters */ - static int out_height() + static unsigned int out_height() { return 4; } - static int out_width() + static unsigned int out_width() { return get_vector_length() * 4; } - static int k_unroll() + static unsigned int k_unroll() { return 1; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_native_s8s32_dot_4VLx4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_native_s8s32_dot_4VLx4.hpp index f5634e3618..8b98358cd4 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_native_s8s32_dot_4VLx4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_native_s8s32_dot_4VLx4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,17 +42,17 @@ public: typedef void (*kern_type)(const int8_t *, int, const int8_t *, int ldb, int32_t *, int, int32_t, int, int, int); /* Kernel blocking parameters */ - static int out_height() + static unsigned int out_height() { return 4; } - static int out_width() + static unsigned int out_width() { return get_vector_length() * 4; } - static int k_unroll() + static unsigned int k_unroll() { return 4; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_native_u8u32_dot_4VLx4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_native_u8u32_dot_4VLx4.hpp index f5ebad8565..bcbd3d35f5 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_native_u8u32_dot_4VLx4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_native_u8u32_dot_4VLx4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,17 +43,17 @@ public: typedef void (*kern_type)(const uint8_t *, int, const uint8_t *, int ldb, uint32_t *, int, uint32_t, int, int, int); /* Kernel blocking parameters */ - static int out_height() + static unsigned int out_height() { return 4; } - static int out_width() + static unsigned int out_width() { return get_vector_length() * 4; } - static int k_unroll() + static unsigned int k_unroll() { return 4; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_fp32_mla_1VLx4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_fp32_mla_1VLx4.hpp index 80b216ca14..06622d6f2e 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_fp32_mla_1VLx4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_fp32_mla_1VLx4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,17 +42,17 @@ public: typedef void (*kern_type)(const float *, int, const float *, int ldb, float *, int, float, int, int, int); /* Kernel blocking parameters */ - static int out_height() + static unsigned int out_height() { return 4; } - static int out_width() + static unsigned int out_width() { return get_vector_length() * 1; } - static int k_unroll() + static unsigned int k_unroll() { return 1; } diff --git a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_1VLx4.hpp b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_1VLx4.hpp index aa2c522382..022efdfc26 100644 --- a/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_1VLx4.hpp +++ b/src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_1VLx4.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,17 +42,17 @@ public: typedef void (*kern_type)(const float *, int, const float *, float *, int, float, int, int, int); /* Kernel blocking parameters */ - static int out_height() + static unsigned int out_height() { return 4; } - static int out_width() + static unsigned int out_width() { return get_vector_length() * 1; } - static int k_unroll() + static unsigned int k_unroll() { return 1; } -- cgit v1.2.1