aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2020-12-08 18:50:56 +0000
committerSang-Hoon Park <sang-hoon.park@arm.com>2021-01-07 13:11:57 +0000
commitaf1870b38bd4f86ccbb4152a506586afd6c64e02 (patch)
tree75e99bdda471dd44b986c689b6bd5799bc36b0d8
parent7e5b7bfc06c0bd8aecd809817866733c4fdf07fe (diff)
downloadComputeLibrary-af1870b38bd4f86ccbb4152a506586afd6c64e02.tar.gz
Add SVE support to elementwise unary kernels
It also includes decoupling of kernels using different data types. Partially implements: COMPMID-3872 Change-Id: I226cb9e55a5d9f8a0c63e37631f087af45f2d640 Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4711 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--src/core/NEON/SVEMath.inl6
-rw-r--r--src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp148
-rw-r--r--src/core/NEON/kernels/NEElementwiseUnaryKernel.h25
-rw-r--r--src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h116
-rw-r--r--src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h111
5 files changed, 298 insertions, 108 deletions
diff --git a/src/core/NEON/SVEMath.inl b/src/core/NEON/SVEMath.inl
index fbf90f9b04..f201e92738 100644
--- a/src/core/NEON/SVEMath.inl
+++ b/src/core/NEON/SVEMath.inl
@@ -308,15 +308,15 @@ inline svfloat16_t svpow_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b)
#if defined(__ARM_FEATURE_SVE2)
auto pg_top = pg;
auto a_top = svcvtlt_f32_x(pg, a);
- auto b_top = svcvtlt_f32_x(pg, b)
+ auto b_top = svcvtlt_f32_x(pg, b);
#else /* defined(__ARM_FEATURE_SVE2) */
auto pg_top = svptrue_b16();
auto a_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(a))));
auto b_top = svcvt_f32_z(pg_top, svreinterpret_f16(svrevh_z(svptrue_b16(), svreinterpret_u32(b))));
#endif /* defined(__ARM_FEATURE_SVE2) */
- auto res_bottom = svpow_f32_z(pg, a_bottom, b_bottom);
- auto res_top = svpow_f32_z(pg_top, a_top, b_top);
+ auto res_bottom = svpow_f32_z(pg, a_bottom, b_bottom);
+ auto res_top = svpow_f32_z(pg_top, a_top, b_top);
#if defined(__ARM_FEATURE_SVE2)
return svcvtnt_f16_m(svcvt_f16_z(pg, res_bottom), pg_top, res_top);
diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp
index d899643fdc..ed1cb6fca4 100644
--- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp
+++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,7 +28,10 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
#include "src/core/CPP/Validate.h"
-#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h"
+#include "src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h"
+#include "src/core/common/Registrars.h"
+#include "src/core/common/StdTypes.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/ToolchainSupport.h"
@@ -37,85 +40,65 @@ namespace arm_compute
{
namespace
{
-template <typename ScalarType>
-inline ScalarType elementwise_op_scalar_imp(ElementWiseUnary op, const ScalarType &a)
+using ElementwiseUnarySelector = std::add_pointer<bool(DataType)>::type;
+
+struct ElementwiseUnaryKernel
{
- switch(op)
- {
- case ElementWiseUnary::RSQRT:
- return 1 / sqrt(a);
- case ElementWiseUnary::EXP:
- return std::exp(a);
- case ElementWiseUnary::NEG:
- return -a;
- case ElementWiseUnary::LOG:
- return std::log(a);
- case ElementWiseUnary::ABS:
- return std::abs(a);
- case ElementWiseUnary::ROUND:
- return support::cpp11::nearbyint(a);
- case ElementWiseUnary::SIN:
- return std::sin(a);
- default:
- ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
- }
-}
+ const char *name;
+ const ElementwiseUnarySelector is_selected;
+ NEElementwiseUnaryKernel::ElementwiseUnaryUkernelPtr ukernel;
+};
-template <typename ScalarType, typename VectorType>
-inline VectorType elementwise_op_imp(ElementWiseUnary op, const VectorType &a)
+static const ElementwiseUnaryKernel available_kernels[] =
{
- switch(op)
+#if defined(__ARM_FEATURE_SVE)
{
- case ElementWiseUnary::RSQRT:
- return wrapper::vinvsqrt(a);
- case ElementWiseUnary::EXP:
- return wrapper::vexpq(a);
- case ElementWiseUnary::NEG:
- return wrapper::vneg(a);
- case ElementWiseUnary::LOG:
- return wrapper::vlog(a);
- case ElementWiseUnary::ABS:
- return wrapper::vabs(a);
- case ElementWiseUnary::ROUND:
- return wrapper::vround(a);
- case ElementWiseUnary::SIN:
- return wrapper::vsin(a);
- default:
- ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
- }
-}
-} // namespace
+ "fp32_sve_elementwise_unary",
+ [](DataType dt) { return dt == DataType::F32; },
+ REGISTER_FP32_SVE(arm_compute::cpu::elementwise_sve_op<f32>),
+ },
+ {
+ "fp16_sve_elementwise_unary",
+ [](DataType dt) { return dt == DataType::F16; },
+ REGISTER_FP16_SVE(arm_compute::cpu::elementwise_sve_op<f16>),
+ },
+ {
+ "s32_sve_elementwise_unary",
+ [](DataType dt) { return dt == DataType::S32; },
+ REGISTER_INTEGER_SVE(arm_compute::cpu::elementwise_sve_op<s32>),
+ },
+#endif // defined(__ARM_FEATURE_SVE)
+ {
+ "fp32_neon_elementwise_unary",
+ [](DataType dt) { return dt == DataType::F32; },
+ REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<f32>),
+ },
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+ {
+ "fp16_neon_elementwise_unary",
+ [](DataType dt) { return dt == DataType::F16; },
+ REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<f16>),
+ },
+#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+ {
+ "s32_neon_elementwise_unary",
+ [](DataType dt) { return dt == DataType::S32; },
+ REGISTER_INTEGER_NEON(arm_compute::cpu::elementwise_op<s32>),
+ },
+};
-template <typename ScalarType>
-void NEElementwiseUnaryKernel::elementwise_op(const Window &window)
+const ElementwiseUnaryKernel *get_implementation(DataType dt)
{
- const int window_step_x = 16 / sizeof(ScalarType);
- const auto window_start_x = static_cast<int>(window.x().start());
- const auto window_end_x = static_cast<int>(window.x().end());
-
- Window win = window;
- win.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- Iterator input(_input, win);
- Iterator output(_output, win);
-
- execute_window_loop(win, [&](const Coordinates &)
+ for(const auto &uk : available_kernels)
{
- auto output_ptr = reinterpret_cast<ScalarType *>(output.ptr());
- const auto input_ptr = reinterpret_cast<const ScalarType *>(input.ptr());
-
- int x = window_start_x;
- for(; x <= window_end_x - window_step_x; x += window_step_x)
+ if(uk.is_selected(dt))
{
- wrapper::vstore(output_ptr + x, elementwise_op_imp<ScalarType>(_op, wrapper::vloadq(input_ptr + x)));
+ return &uk;
}
- for(; x < window_end_x; ++x)
- {
- *(output_ptr + x) = elementwise_op_scalar_imp(_op, *(input_ptr + x));
- }
- },
- input, output);
+ }
+ return nullptr;
}
+} // namespace
NEElementwiseUnaryKernel::NEElementwiseUnaryKernel()
: _func(nullptr), _input(nullptr), _output(nullptr), _op()
@@ -143,28 +126,17 @@ void NEElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensor *inp
INEKernel::configure(win);
- switch(input->info()->data_type())
- {
- case DataType::F32:
- _func = &NEElementwiseUnaryKernel::elementwise_op<float>;
- break;
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- case DataType::F16:
- _func = &NEElementwiseUnaryKernel::elementwise_op<float16_t>;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- break;
- case DataType::S32:
- _func = &NEElementwiseUnaryKernel::elementwise_op<int32_t>;
- break;
- default:
- ARM_COMPUTE_ERROR("DataType not supported");
- }
+ _func = get_implementation(input->info()->data_type())->ukernel;
}
Status NEElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+
+ const auto *uk = get_implementation(input->data_type());
+ ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
+
switch(op)
{
case ElementWiseUnary::EXP:
@@ -196,6 +168,6 @@ void NEElementwiseUnaryKernel::run(const Window &window, const ThreadInfo &info)
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
ARM_COMPUTE_ERROR_ON(_func == nullptr);
- (this->*_func)(window);
+ (*_func)(_input, _output, window, _op);
}
} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.h b/src/core/NEON/kernels/NEElementwiseUnaryKernel.h
index fcf0aa51c5..b248e821c3 100644
--- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.h
+++ b/src/core/NEON/kernels/NEElementwiseUnaryKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -78,26 +78,17 @@ public:
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
-private:
- /** Common signature for all the specialised arithmetic functions
+ /** Common signature for all the specialised elementwise unary micro-kernels
*
* @param[in] window Region on which to execute the kernel.
*/
- using ElementwiseUnaryPtr = void (NEElementwiseUnaryKernel::*)(const Window &window);
-
- /** Template function to run elementwise unary operation
- *
- * @tparam ScalarType Scalar datatype
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename ScalarType>
- void elementwise_op(const Window &window);
+ using ElementwiseUnaryUkernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &, ElementWiseUnary)>::type;
- ElementwiseUnaryPtr _func;
- const ITensor *_input;
- ITensor *_output;
- ElementWiseUnary _op;
+private:
+ ElementwiseUnaryUkernelPtr _func;
+ const ITensor *_input;
+ ITensor *_output;
+ ElementWiseUnary _op;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H */
diff --git a/src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h b/src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h
new file mode 100644
index 0000000000..307e95fae9
--- /dev/null
+++ b/src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H
+#define SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+template <typename ScalarType>
+inline ScalarType elementwise_op_scalar_imp(ElementWiseUnary op, const ScalarType &a)
+{
+ switch(op)
+ {
+ case ElementWiseUnary::RSQRT:
+ return 1 / sqrt(a);
+ case ElementWiseUnary::EXP:
+ return std::exp(a);
+ case ElementWiseUnary::NEG:
+ return -a;
+ case ElementWiseUnary::LOG:
+ return std::log(a);
+ case ElementWiseUnary::ABS:
+ return std::abs(a);
+ case ElementWiseUnary::ROUND:
+ return support::cpp11::nearbyint(a);
+ case ElementWiseUnary::SIN:
+ return std::sin(a);
+ default:
+ ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+ }
+}
+
+template <typename ScalarType, typename VectorType>
+inline VectorType elementwise_op_imp(ElementWiseUnary op, const VectorType &a)
+{
+ switch(op)
+ {
+ case ElementWiseUnary::RSQRT:
+ return wrapper::vinvsqrt(a);
+ case ElementWiseUnary::EXP:
+ return wrapper::vexpq(a);
+ case ElementWiseUnary::NEG:
+ return wrapper::vneg(a);
+ case ElementWiseUnary::LOG:
+ return wrapper::vlog(a);
+ case ElementWiseUnary::ABS:
+ return wrapper::vabs(a);
+ case ElementWiseUnary::ROUND:
+ return wrapper::vround(a);
+ case ElementWiseUnary::SIN:
+ return wrapper::vsin(a);
+ default:
+ ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+ }
+}
+
+template <typename ScalarType>
+void elementwise_op(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
+{
+ const int window_step_x = 16 / sizeof(ScalarType);
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+
+ Window win = window;
+ win.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(in, win);
+ Iterator output(out, win);
+
+ execute_window_loop(win, [&](const Coordinates &)
+ {
+ auto output_ptr = reinterpret_cast<ScalarType *>(output.ptr());
+ const auto input_ptr = reinterpret_cast<const ScalarType *>(input.ptr());
+
+ int x = window_start_x;
+ for(; x <= window_end_x - window_step_x; x += window_step_x)
+ {
+ wrapper::vstore(output_ptr + x, elementwise_op_imp<ScalarType>(op, wrapper::vloadq(input_ptr + x)));
+ }
+ for(; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = elementwise_op_scalar_imp(op, *(input_ptr + x));
+ }
+ },
+ input, output);
+}
+
+} // namespace cpu
+} // namespace arm_compute
+
+#endif // SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H \ No newline at end of file
diff --git a/src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h b/src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h
new file mode 100644
index 0000000000..23502c71e5
--- /dev/null
+++ b/src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_SVE_KERNELS_ELEMENTWISE_UNARY_LIST_H
+#define SRC_CORE_SVE_KERNELS_ELEMENTWISE_UNARY_LIST_H
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
+#if defined(__ARM_FEATURE_SVE)
+#include "src/core/NEON/SVEMath.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+template <typename ScalarType, typename VectorType>
+inline typename std::enable_if<utils::traits::is_floating_point<ScalarType>::value, VectorType>::type elementwise_op_sve_imp(svbool_t pg, ElementWiseUnary op, const VectorType &a)
+{
+ switch(op)
+ {
+ case ElementWiseUnary::RSQRT:
+ return svinvsqrt(pg, a);
+ case ElementWiseUnary::EXP:
+ return wrapper::svexp_z(pg, a);
+ case ElementWiseUnary::NEG:
+ return svneg_z(pg, a);
+ case ElementWiseUnary::LOG:
+ return wrapper::svlog_z(pg, a);
+ case ElementWiseUnary::ABS:
+ return svabs_z(pg, a);
+ case ElementWiseUnary::ROUND:
+ return svrintn_z(pg, a);
+ case ElementWiseUnary::SIN:
+ return wrapper::svsin_z(pg, a);
+ default:
+ ARM_COMPUTE_ERROR("NOT_SUPPORTED");
+ }
+}
+
+template <typename ScalarType, typename VectorType>
+inline typename std::enable_if<std::is_integral<ScalarType>::value, VectorType>::type elementwise_op_sve_imp(svbool_t pg, ElementWiseUnary op, const VectorType &a)
+{
+ switch(op)
+ {
+ case ElementWiseUnary::NEG:
+ return svneg_z(pg, a);
+ case ElementWiseUnary::ABS:
+ return svabs_z(pg, a);
+ default:
+ ARM_COMPUTE_ERROR("NOT_SUPPORTED");
+ }
+}
+
+template <typename ScalarType>
+void elementwise_sve_op(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
+{
+ const auto all_true_pg = wrapper::svptrue<ScalarType>();
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+
+ Window win = window;
+ win.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(in, win);
+ Iterator output(out, win);
+
+ execute_window_loop(win, [&](const Coordinates &)
+ {
+ auto output_ptr = reinterpret_cast<ScalarType *>(output.ptr());
+ const auto input_ptr = reinterpret_cast<const ScalarType *>(input.ptr());
+ int x = window_start_x;
+
+ svbool_t pg = wrapper::svwhilelt<ScalarType>(x, window_end_x);
+ do
+ {
+ const auto vin = svld1(pg, input_ptr + x);
+ svst1(pg, output_ptr + x, elementwise_op_sve_imp<ScalarType, decltype(vin)>(pg, op, vin));
+ x += wrapper::svcnt<ScalarType>();
+ pg = wrapper::svwhilelt<ScalarType>(x, window_end_x);
+ }
+ while(svptest_any(all_true_pg, pg));
+ },
+ input, output);
+}
+
+} // namespace cpu
+} // namespace arm_compute
+#endif // defined(__ARM_FEATURE_SVE)
+#endif // SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H \ No newline at end of file