From 5db75c350ca0c0d8965a894d7e1a371746a2102b Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Thu, 7 Jan 2021 16:59:32 +0000 Subject: Add SVE support for non-quantized elementwise operations SVE kernels are added to all previously supported arithmetic and comparison operations with exception of S16 arithmetic operations due to complexity of widening and narrowing of integer vectors. Partially implements: COMPMID-3872 Change-Id: Ic433eb7227dfcfd0d1429f18acebec2d934ca8bd Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4778 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins --- .../NEON/kernels/NEElementwiseOperationKernel.cpp | 27 +++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'src/core/NEON/kernels') diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index 412ae247cb..29ae9037af 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/SVE/kernels/elementwise/impl/elementwise_list.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -1135,14 +1136,23 @@ configure_arithm_func(const ITensorInfo *input1, const ITensorInfo *input2, ITen { static std::map map_function = { +#if defined(__ARM_FEATURE_SVE) + { "op_F32_F32_F32", &arm_compute::cpu::sve::elementwise_arithmetic_op }, + { "op_S32_S32_S32", &arm_compute::cpu::sve::elementwise_arithmetic_op }, +#else /* defined(__ARM_FEATURE_SVE) */ { "op_F32_F32_F32", &elementwise_arithm_op> }, - { "op_S16_S16_S16", &elementwise_arithm_op> }, { "op_S32_S32_S32", &elementwise_arithm_op> }, +#endif /* defined(__ARM_FEATURE_SVE) */ + { "op_S16_S16_S16", &elementwise_arithm_op> }, { "op_QASYMM8_QASYMM8_QASYMM8", &elementwise_arithm_op_quantized }, { "op_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED", &elementwise_arithm_op_quantized_signed } }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#if defined(__ARM_FEATURE_SVE) + map_function["op_F16_F16_F16"] = &arm_compute::cpu::sve::elementwise_arithmetic_op; +#else /* defined(__ARM_FEATURE_SVE) */ map_function["op_F16_F16_F16"] = &elementwise_arithm_op>; +#endif /* defined(__ARM_FEATURE_SVE) */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ return configure_func(input1, input2, output, map_function); @@ -1154,15 +1164,26 @@ configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITenso { static std::map map_function = { +#if defined(__ARM_FEATURE_SVE) + { "op_U8_U8_U8", &arm_compute::cpu::sve::elementwise_comparison_op }, + { "op_F32_F32_U8", &arm_compute::cpu::sve::elementwise_comparison_op }, + { "op_S16_S16_U8", &arm_compute::cpu::sve::elementwise_comparison_op }, + { "op_S32_S32_U8", &arm_compute::cpu::sve::elementwise_comparison_op }, +#else /* defined(__ARM_FEATURE_SVE) */ { "op_U8_U8_U8", &elementwise_comp_op_8 }, { "op_F32_F32_U8", &elementwise_comp_op_32 }, { "op_S16_S16_U8", &elementwise_comp_op_16 }, { "op_S32_S32_U8", &elementwise_comp_op_32 }, +#endif /* defined(__ARM_FEATURE_SVE) */ { "op_QASYMM8_SIGNED_QASYMM8_SIGNED_U8", &elementwise_comp_op_quantized_signed }, { "op_QASYMM8_QASYMM8_U8", &elementwise_comp_op_quantized } }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - map_function["op_F16_F16_U8"] = &elementwise_comp_op_16; +#if defined(__ARM_FEATURE_SVE) + map_function["op_F16_F16_U8"] = &arm_compute::cpu::sve::elementwise_comparison_op; +#else /* defined(__ARM_FEATURE_SVE) */ + map_function["op_F16_F16_U8"] = &elementwise_comp_op_16; +#endif /* defined(__ARM_FEATURE_SVE) */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ return configure_func(input1, input2, output, map_function); -- cgit v1.2.1