From af1870b38bd4f86ccbb4152a506586afd6c64e02 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Tue, 8 Dec 2020 18:50:56 +0000 Subject: Add SVE support to elementwise unary kernels It also includes decoupling of kernels using different data types. Partially implements: COMPMID-3872 Change-Id: I226cb9e55a5d9f8a0c63e37631f087af45f2d640 Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4711 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- src/core/NEON/SVEMath.inl | 6 +- src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp | 148 +++++++++------------ src/core/NEON/kernels/NEElementwiseUnaryKernel.h | 25 ++-- .../elementwise/impl/elementwise_unary_list.h | 116 ++++++++++++++++ .../elementwise/impl/elementwise_unary_list.h | 111 ++++++++++++++++ 5 files changed, 298 insertions(+), 108 deletions(-) create mode 100644 src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h create mode 100644 src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl index fbf90f9b04..f201e92738 100644 --- a/src/core/NEON/SVEMath.inl +++ b/src/core/NEON/SVEMath.inl @@ -308,15 +308,15 @@ inline svfloat16_t svpow_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) #if defined(__ARM_FEATURE_SVE2) auto pg_top = pg; auto a_top = svcvtlt_f32_x(pg, a); - auto b_top = svcvtlt_f32_x(pg, b) + auto b_top = svcvtlt_f32_x(pg, b); #else /* defined(__ARM_FEATURE_SVE2) */ auto pg_top = svptrue_b16(); auto a_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(a)))); auto b_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(b)))); #endif /* defined(__ARM_FEATURE_SVE2) */ - auto res_bottom = svpow_f32_z(pg, a_bottom, b_bottom); - auto res_top = svpow_f32_z(pg_top, a_top, b_top); + auto res_bottom = svpow_f32_z(pg, a_bottom, b_bottom); + auto res_top = svpow_f32_z(pg_top, a_top, b_top); #if defined(__ARM_FEATURE_SVE2) return svcvtnt_f16_m(svcvt_f16_z(pg, res_bottom), pg_top, res_top); diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp index d899643fdc..ed1cb6fca4 100644 --- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,7 +28,10 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" #include "src/core/CPP/Validate.h" -#include "src/core/NEON/wrapper/wrapper.h" +#include "src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h" +#include "src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h" +#include "src/core/common/Registrars.h" +#include "src/core/common/StdTypes.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" @@ -37,85 +40,65 @@ namespace arm_compute { namespace { -template -inline ScalarType elementwise_op_scalar_imp(ElementWiseUnary op, const ScalarType &a) +using ElementwiseUnarySelector = std::add_pointer::type; + +struct ElementwiseUnaryKernel { - switch(op) - { - case ElementWiseUnary::RSQRT: - return 1 / sqrt(a); - case ElementWiseUnary::EXP: - return std::exp(a); - case ElementWiseUnary::NEG: - return -a; - case ElementWiseUnary::LOG: - return std::log(a); - case ElementWiseUnary::ABS: - return std::abs(a); - case ElementWiseUnary::ROUND: - return support::cpp11::nearbyint(a); - case ElementWiseUnary::SIN: - return std::sin(a); - default: - ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); - } -} + const char *name; + const ElementwiseUnarySelector is_selected; + NEElementwiseUnaryKernel::ElementwiseUnaryUkernelPtr ukernel; +}; -template -inline VectorType elementwise_op_imp(ElementWiseUnary op, const VectorType &a) +static const ElementwiseUnaryKernel available_kernels[] = { - switch(op) +#if defined(__ARM_FEATURE_SVE) { - case ElementWiseUnary::RSQRT: - return wrapper::vinvsqrt(a); - case ElementWiseUnary::EXP: - return wrapper::vexpq(a); - case ElementWiseUnary::NEG: - return wrapper::vneg(a); - case ElementWiseUnary::LOG: - return wrapper::vlog(a); - case ElementWiseUnary::ABS: - return wrapper::vabs(a); - case ElementWiseUnary::ROUND: - return wrapper::vround(a); - case ElementWiseUnary::SIN: - return wrapper::vsin(a); - default: - ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); - } -} -} // namespace + "fp32_sve_elementwise_unary", + [](DataType dt) { return dt == DataType::F32; }, + REGISTER_FP32_SVE(arm_compute::cpu::elementwise_sve_op), + }, + { + "fp16_sve_elementwise_unary", + [](DataType dt) { return dt == DataType::F16; }, + REGISTER_FP16_SVE(arm_compute::cpu::elementwise_sve_op), + }, + { + "s32_sve_elementwise_unary", + [](DataType dt) { return dt == DataType::S32; }, + REGISTER_INTEGER_SVE(arm_compute::cpu::elementwise_sve_op), + }, +#endif // defined(__ARM_FEATURE_SVE) + { + "fp32_neon_elementwise_unary", + [](DataType dt) { return dt == DataType::F32; }, + REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op), + }, +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + { + "fp16_neon_elementwise_unary", + [](DataType dt) { return dt == DataType::F16; }, + REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op), + }, +#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + { + "s32_neon_elementwise_unary", + [](DataType dt) { return dt == DataType::S32; }, + REGISTER_INTEGER_NEON(arm_compute::cpu::elementwise_op), + }, +}; -template -void NEElementwiseUnaryKernel::elementwise_op(const Window &window) +const ElementwiseUnaryKernel *get_implementation(DataType dt) { - const int window_step_x = 16 / sizeof(ScalarType); - const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()); - - Window win = window; - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input(_input, win); - Iterator output(_output, win); - - execute_window_loop(win, [&](const Coordinates &) + for(const auto &uk : available_kernels) { - auto output_ptr = reinterpret_cast(output.ptr()); - const auto input_ptr = reinterpret_cast(input.ptr()); - - int x = window_start_x; - for(; x <= window_end_x - window_step_x; x += window_step_x) + if(uk.is_selected(dt)) { - wrapper::vstore(output_ptr + x, elementwise_op_imp(_op, wrapper::vloadq(input_ptr + x))); + return &uk; } - for(; x < window_end_x; ++x) - { - *(output_ptr + x) = elementwise_op_scalar_imp(_op, *(input_ptr + x)); - } - }, - input, output); + } + return nullptr; } +} // namespace NEElementwiseUnaryKernel::NEElementwiseUnaryKernel() : _func(nullptr), _input(nullptr), _output(nullptr), _op() @@ -143,28 +126,17 @@ void NEElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensor *inp INEKernel::configure(win); - switch(input->info()->data_type()) - { - case DataType::F32: - _func = &NEElementwiseUnaryKernel::elementwise_op; - break; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - _func = &NEElementwiseUnaryKernel::elementwise_op; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - break; - case DataType::S32: - _func = &NEElementwiseUnaryKernel::elementwise_op; - break; - default: - ARM_COMPUTE_ERROR("DataType not supported"); - } + _func = get_implementation(input->info()->data_type())->ukernel; } Status NEElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); + + const auto *uk = get_implementation(input->data_type()); + ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); + switch(op) { case ElementWiseUnary::EXP: @@ -196,6 +168,6 @@ void NEElementwiseUnaryKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (this->*_func)(window); + (*_func)(_input, _output, window, _op); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.h b/src/core/NEON/kernels/NEElementwiseUnaryKernel.h index fcf0aa51c5..b248e821c3 100644 --- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.h +++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -78,26 +78,17 @@ public: // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; -private: - /** Common signature for all the specialised arithmetic functions + /** Common signature for all the specialised elementwise unary micro-kernels * * @param[in] window Region on which to execute the kernel. */ - using ElementwiseUnaryPtr = void (NEElementwiseUnaryKernel::*)(const Window &window); - - /** Template function to run elementwise unary operation - * - * @tparam ScalarType Scalar datatype - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void elementwise_op(const Window &window); + using ElementwiseUnaryUkernelPtr = std::add_pointer::type; - ElementwiseUnaryPtr _func; - const ITensor *_input; - ITensor *_output; - ElementWiseUnary _op; +private: + ElementwiseUnaryUkernelPtr _func; + const ITensor *_input; + ITensor *_output; + ElementWiseUnary _op; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H */ diff --git a/src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h b/src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h new file mode 100644 index 0000000000..307e95fae9 --- /dev/null +++ b/src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H +#define SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H + +#include "arm_compute/core/Types.h" +#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" + +namespace arm_compute +{ +namespace cpu +{ +template +inline ScalarType elementwise_op_scalar_imp(ElementWiseUnary op, const ScalarType &a) +{ + switch(op) + { + case ElementWiseUnary::RSQRT: + return 1 / sqrt(a); + case ElementWiseUnary::EXP: + return std::exp(a); + case ElementWiseUnary::NEG: + return -a; + case ElementWiseUnary::LOG: + return std::log(a); + case ElementWiseUnary::ABS: + return std::abs(a); + case ElementWiseUnary::ROUND: + return support::cpp11::nearbyint(a); + case ElementWiseUnary::SIN: + return std::sin(a); + default: + ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); + } +} + +template +inline VectorType elementwise_op_imp(ElementWiseUnary op, const VectorType &a) +{ + switch(op) + { + case ElementWiseUnary::RSQRT: + return wrapper::vinvsqrt(a); + case ElementWiseUnary::EXP: + return wrapper::vexpq(a); + case ElementWiseUnary::NEG: + return wrapper::vneg(a); + case ElementWiseUnary::LOG: + return wrapper::vlog(a); + case ElementWiseUnary::ABS: + return wrapper::vabs(a); + case ElementWiseUnary::ROUND: + return wrapper::vround(a); + case ElementWiseUnary::SIN: + return wrapper::vsin(a); + default: + ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); + } +} + +template +void elementwise_op(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op) +{ + const int window_step_x = 16 / sizeof(ScalarType); + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + + Window win = window; + win.set(Window::DimX, Window::Dimension(0, 1, 1)); + + Iterator input(in, win); + Iterator output(out, win); + + execute_window_loop(win, [&](const Coordinates &) + { + auto output_ptr = reinterpret_cast(output.ptr()); + const auto input_ptr = reinterpret_cast(input.ptr()); + + int x = window_start_x; + for(; x <= window_end_x - window_step_x; x += window_step_x) + { + wrapper::vstore(output_ptr + x, elementwise_op_imp(op, wrapper::vloadq(input_ptr + x))); + } + for(; x < window_end_x; ++x) + { + *(output_ptr + x) = elementwise_op_scalar_imp(op, *(input_ptr + x)); + } + }, + input, output); +} + +} // namespace cpu +} // namespace arm_compute + +#endif // SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H \ No newline at end of file diff --git a/src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h b/src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h new file mode 100644 index 0000000000..23502c71e5 --- /dev/null +++ b/src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_SVE_KERNELS_ELEMENTWISE_UNARY_LIST_H +#define SRC_CORE_SVE_KERNELS_ELEMENTWISE_UNARY_LIST_H + +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/Traits.h" +#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" +#if defined(__ARM_FEATURE_SVE) +#include "src/core/NEON/SVEMath.h" +#include + +namespace arm_compute +{ +namespace cpu +{ +template +inline typename std::enable_if::value, VectorType>::type elementwise_op_sve_imp(svbool_t pg, ElementWiseUnary op, const VectorType &a) +{ + switch(op) + { + case ElementWiseUnary::RSQRT: + return svinvsqrt(pg, a); + case ElementWiseUnary::EXP: + return wrapper::svexp_z(pg, a); + case ElementWiseUnary::NEG: + return svneg_z(pg, a); + case ElementWiseUnary::LOG: + return wrapper::svlog_z(pg, a); + case ElementWiseUnary::ABS: + return svabs_z(pg, a); + case ElementWiseUnary::ROUND: + return svrintn_z(pg, a); + case ElementWiseUnary::SIN: + return wrapper::svsin_z(pg, a); + default: + ARM_COMPUTE_ERROR("NOT_SUPPORTED"); + } +} + +template +inline typename std::enable_if::value, VectorType>::type elementwise_op_sve_imp(svbool_t pg, ElementWiseUnary op, const VectorType &a) +{ + switch(op) + { + case ElementWiseUnary::NEG: + return svneg_z(pg, a); + case ElementWiseUnary::ABS: + return svabs_z(pg, a); + default: + ARM_COMPUTE_ERROR("NOT_SUPPORTED"); + } +} + +template +void elementwise_sve_op(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op) +{ + const auto all_true_pg = wrapper::svptrue(); + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + + Window win = window; + win.set(Window::DimX, Window::Dimension(0, 1, 1)); + + Iterator input(in, win); + Iterator output(out, win); + + execute_window_loop(win, [&](const Coordinates &) + { + auto output_ptr = reinterpret_cast(output.ptr()); + const auto input_ptr = reinterpret_cast(input.ptr()); + int x = window_start_x; + + svbool_t pg = wrapper::svwhilelt(x, window_end_x); + do + { + const auto vin = svld1(pg, input_ptr + x); + svst1(pg, output_ptr + x, elementwise_op_sve_imp(pg, op, vin)); + x += wrapper::svcnt(); + pg = wrapper::svwhilelt(x, window_end_x); + } + while(svptest_any(all_true_pg, pg)); + }, + input, output); +} + +} // namespace cpu +} // namespace arm_compute +#endif // defined(__ARM_FEATURE_SVE) +#endif // SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H \ No newline at end of file -- cgit v1.2.1