From fc94f4d23abd4bc427b701f54ad85282e9ec7872 Mon Sep 17 00:00:00 2001 From: Michael Tyler Date: Tue, 4 Jun 2024 15:47:37 +0100 Subject: Update CPU kernels and add mixed sign GEMM support - Add support for mixed sign quantized convolution. - Add support for mixed sign dequantized GEMM. - Add SME FP16 GEMV kernel. - Change SME vector length function to use RDSVL instead of static variable. - Add GEMM dilation support internally (not exposed yet). - Remove unused "get_default_activation_values" functions. - Add SVE fixed format interleaved BF16 DOT kernel. - Updates and optimizations to assembly kernels. Resolves COMPMID-6926 Change-Id: I227f502502611d4cc4111c89e30c53ce94079544 Signed-off-by: Michael Tyler Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11570 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/core/NEON/kernels/arm_gemm/utils.hpp | 60 ++++++++------------------------ 1 file changed, 15 insertions(+), 45 deletions(-) (limited to 'src/core/NEON/kernels/arm_gemm/utils.hpp') diff --git a/src/core/NEON/kernels/arm_gemm/utils.hpp b/src/core/NEON/kernels/arm_gemm/utils.hpp index 11b1bd3e05..d0a8635604 100644 --- a/src/core/NEON/kernels/arm_gemm/utils.hpp +++ b/src/core/NEON/kernels/arm_gemm/utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -176,7 +176,6 @@ namespace utils { // which then calls SVE kernels (compiled accordingly) iff SVE is detected at runtime. template inline unsigned long get_vector_length() { -#if defined(__aarch64__) uint64_t vl; __asm __volatile ( @@ -188,24 +187,26 @@ inline unsigned long get_vector_length() { ); return vl / sizeof(T); -#else // !defined(__aarch64__) - return 16 / sizeof(T); -#endif // defined(__aarch64__) } -#ifdef ARM_COMPUTE_ENABLE_SME namespace sme { -// function from misc-sve.cpp -extern unsigned int raw_vector_length(); - template -inline unsigned long get_vector_length() { - return raw_vector_length() / sizeof(T); +inline uint64_t get_vector_length() { + uint64_t raw_vector_length; + + __asm __volatile ( + ".inst 0x04bf5821\n" // RDSVL X1, #1 + "mov %0, X1\n" + : "=r" (raw_vector_length) + : + : "x1" + ); + + return raw_vector_length / sizeof(T); } } // namespace sme -#endif // ARM_COMPUTE_ENABLE_SME // get_vector_length(VLType): Returns vector length for type "T". // @@ -214,48 +215,17 @@ inline unsigned long get_vector_length() { template inline unsigned long get_vector_length(VLType vl_type) { switch (vl_type) { -#ifdef ARM_COMPUTE_ENABLE_SME +#ifdef ARM_COMPUTE_ENABLE_SVE case VLType::SME: return sme::get_vector_length(); -#endif // ARM_COMPUTE_ENABLE_SME case VLType::SVE: return get_vector_length(); +#endif default: return 16 / sizeof(T); } } -// get_default_activation_values(): Returns the default values for activation min and max for integer activation. -template -inline std::tuple get_default_activation_values() -{ - const T min = static_cast(std::numeric_limits::min()); - const T max = static_cast(std::numeric_limits::max()); - - return std::make_tuple(min, max); -} - -// get_default_activation_values(): Returns the default values for activation min and max for float activation. -template <> -inline std::tuple get_default_activation_values() -{ - const float min = static_cast(-std::numeric_limits::infinity()); - const float max = static_cast(std::numeric_limits::infinity()); - - return std::make_tuple(min, max); -} - -#if defined(__ARM_FP16_ARGS) -// get_default_activation_values(): Returns the default values for activation min and max for __fp16 activation. -template <> -inline std::tuple<__fp16, __fp16> get_default_activation_values() -{ - const __fp16 min = static_cast<__fp16>(-std::numeric_limits::infinity()); - const __fp16 max = static_cast<__fp16>(std::numeric_limits::infinity()); - - return std::make_tuple(min, max); -} -#endif // defined(__ARM_FP16_ARGS) } // utils namespace } // arm_gemm namespace -- cgit v1.2.1