diff options
author | Sang-Hoon Park <sang-hoon.park@arm.com> | 2021-01-07 16:59:32 +0000 |
---|---|---|
committer | Sang-Hoon Park <sang-hoon.park@arm.com> | 2021-01-12 15:51:46 +0000 |
commit | 5db75c350ca0c0d8965a894d7e1a371746a2102b (patch) | |
tree | 82cb706b8966d2f405d48f02d936610d9b2558d6 /src/core/NEON/kernels/NEElementwiseOperationKernel.cpp | |
parent | 23adc4cf40936ffe6c0e126ac83c8085b946bde4 (diff) | |
download | ComputeLibrary-5db75c350ca0c0d8965a894d7e1a371746a2102b.tar.gz |
Add SVE support for non-quantized elementwise operations
SVE kernels are added to all previously supported arithmetic
and comparison operations with exception of S16 arithmetic
operations due to complexity of widening and narrowing of
integer vectors.
Partially implements: COMPMID-3872
Change-Id: Ic433eb7227dfcfd0d1429f18acebec2d934ca8bd
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4778
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEElementwiseOperationKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEElementwiseOperationKernel.cpp | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index 412ae247cb..29ae9037af 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/SVE/kernels/elementwise/impl/elementwise_list.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -1135,14 +1136,23 @@ configure_arithm_func(const ITensorInfo *input1, const ITensorInfo *input2, ITen { static std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function = { +#if defined(__ARM_FEATURE_SVE) + { "op_F32_F32_F32", &arm_compute::cpu::sve::elementwise_arithmetic_op<op, float32_t> }, + { "op_S32_S32_S32", &arm_compute::cpu::sve::elementwise_arithmetic_op<op, int32_t> }, +#else /* defined(__ARM_FEATURE_SVE) */ { "op_F32_F32_F32", &elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float, 4>> }, - { "op_S16_S16_S16", &elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int16_t, 8>> }, { "op_S32_S32_S32", &elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int32_t, 4>> }, +#endif /* defined(__ARM_FEATURE_SVE) */ + { "op_S16_S16_S16", &elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int16_t, 8>> }, { "op_QASYMM8_QASYMM8_QASYMM8", &elementwise_arithm_op_quantized<op> }, { "op_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED", &elementwise_arithm_op_quantized_signed<op> } }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#if defined(__ARM_FEATURE_SVE) + map_function["op_F16_F16_F16"] = &arm_compute::cpu::sve::elementwise_arithmetic_op<op, float16_t>; +#else /* defined(__ARM_FEATURE_SVE) */ map_function["op_F16_F16_F16"] = &elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float16_t, 8>>; +#endif /* defined(__ARM_FEATURE_SVE) */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ return configure_func(input1, input2, output, map_function); @@ -1154,15 +1164,26 @@ configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITenso { static std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function = { +#if defined(__ARM_FEATURE_SVE) + { "op_U8_U8_U8", &arm_compute::cpu::sve::elementwise_comparison_op<op, uint8_t> }, + { "op_F32_F32_U8", &arm_compute::cpu::sve::elementwise_comparison_op<op, float> }, + { "op_S16_S16_U8", &arm_compute::cpu::sve::elementwise_comparison_op<op, int16_t> }, + { "op_S32_S32_U8", &arm_compute::cpu::sve::elementwise_comparison_op<op, int32_t> }, +#else /* defined(__ARM_FEATURE_SVE) */ { "op_U8_U8_U8", &elementwise_comp_op_8<op, uint8_t, uint8x16_t> }, { "op_F32_F32_U8", &elementwise_comp_op_32<op, float, float32x4_t> }, { "op_S16_S16_U8", &elementwise_comp_op_16<op, int16_t, int16x8_t> }, { "op_S32_S32_U8", &elementwise_comp_op_32<op, int32_t, int32x4_t> }, +#endif /* defined(__ARM_FEATURE_SVE) */ { "op_QASYMM8_SIGNED_QASYMM8_SIGNED_U8", &elementwise_comp_op_quantized_signed<op> }, { "op_QASYMM8_QASYMM8_U8", &elementwise_comp_op_quantized<op> } }; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - map_function["op_F16_F16_U8"] = &elementwise_comp_op_16<op, float16_t, float16x8_t>; +#if defined(__ARM_FEATURE_SVE) + map_function["op_F16_F16_U8"] = &arm_compute::cpu::sve::elementwise_comparison_op<op, float16_t>; +#else /* defined(__ARM_FEATURE_SVE) */ + map_function["op_F16_F16_U8"] = &elementwise_comp_op_16<op, float16_t, float16x8_t>; +#endif /* defined(__ARM_FEATURE_SVE) */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ return configure_func(input1, input2, output, map_function); |