From cfca87b91def4f455630f2094447dc0500b6256c Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Tue, 9 Apr 2024 23:13:04 +0100 Subject: Add SME2 implementation of softmax for FP16 In addition to the softmax kernel, this patch fixes minor issues in the fp32 implementation. Resolves: COMPMID-6920 Change-Id: Ibbd9f0af5f2a93fba0e92d72ba437279c34149d3 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11402 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Comments-Addressed: Arm Jenkins --- src/core/common/Registrars.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'src/core') diff --git a/src/core/common/Registrars.h b/src/core/common/Registrars.h index 50b3fc1284..a74316b486 100644 --- a/src/core/common/Registrars.h +++ b/src/core/common/Registrars.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023 Arm Limited. + * Copyright (c) 2020-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,12 @@ #define REGISTER_FP16_SVE2(func_name) nullptr #endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ +#if defined(ARM_COMPUTE_ENABLE_SME2) +#define REGISTER_FP16_SME2(func_name) &(func_name) +#else /* !defined(ARM_COMPUTE_ENABLE_SME2) */ +#define REGISTER_FP16_SME2(func_name) nullptr +#endif /* defined(ARM_COMPUTE_ENABLE_SME2) */ + #if defined(ARM_COMPUTE_ENABLE_NEON) #define REGISTER_FP16_NEON(func_name) &(func_name) #else /* !defined(ARM_COMPUTE_ENABLE_NEON) */ @@ -48,6 +54,7 @@ #define REGISTER_FP16_NEON(func_name) nullptr #define REGISTER_FP16_SVE(func_name) nullptr #define REGISTER_FP16_SVE2(func_name) nullptr +#define REGISTER_FP16_SME2(func_name) nullptr #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ #if defined(ENABLE_FP32_KERNELS) @@ -64,6 +71,12 @@ #define REGISTER_FP32_SVE2(func_name) nullptr #endif /* defined(ARM_COMPUTE_ENABLE_SVE2) */ +#if defined(ARM_COMPUTE_ENABLE_SME2) +#define REGISTER_FP32_SME2(func_name) &(func_name) +#else /* !defined(ARM_COMPUTE_ENABLE_SME2) */ +#define REGISTER_FP32_SME2(func_name) nullptr +#endif /* defined(ARM_COMPUTE_ENABLE_SME2) */ + #if defined(ARM_COMPUTE_ENABLE_NEON) #define REGISTER_FP32_NEON(func_name) &(func_name) #else /* !defined(ARM_COMPUTE_ENABLE_NEON) */ @@ -74,6 +87,7 @@ #define REGISTER_FP32_NEON(func_name) nullptr #define REGISTER_FP32_SVE(func_name) nullptr #define REGISTER_FP32_SVE2(func_name) nullptr +#define REGISTER_FP32_SME2(func_name) nullptr #endif /* defined(ENABLE_FP32_KERNELS) */ #if defined(ENABLE_QASYMM8_SIGNED_KERNELS) -- cgit v1.2.1