diff options
Diffstat (limited to 'src/cpu/kernels/elementwise_unary/generic/sve')
4 files changed, 36 insertions, 26 deletions
diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp b/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp index a883309b2e..22ff43c5d9 100644 --- a/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp +++ b/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp @@ -23,6 +23,7 @@ */ #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) #include "arm_compute/core/Helpers.h" + #include "src/cpu/CpuTypes.h" #include "src/cpu/kernels/elementwise_unary/generic/sve/impl.h" @@ -30,11 +31,12 @@ namespace arm_compute { namespace cpu { -void sve_fp16_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut) +void sve_fp16_elementwise_unary( + const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut) { ARM_COMPUTE_UNUSED(lut); return elementwise_sve_op<float16_t>(in, out, window, op); } -} +} // namespace cpu } // namespace arm_compute #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp b/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp index b21ed8ddbc..394bd47adf 100644 --- a/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp +++ b/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/Helpers.h" + #include "src/cpu/CpuTypes.h" #include "src/cpu/kernels/elementwise_unary/generic/sve/impl.h" @@ -30,10 +31,11 @@ namespace arm_compute { namespace cpu { -void sve_fp32_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut) +void sve_fp32_elementwise_unary( + const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut) { ARM_COMPUTE_UNUSED(lut); return elementwise_sve_op<float32_t>(in, out, window, op); } -} +} // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/impl.cpp b/src/cpu/kernels/elementwise_unary/generic/sve/impl.cpp index a948862906..5af534d9e7 100644 --- a/src/cpu/kernels/elementwise_unary/generic/sve/impl.cpp +++ b/src/cpu/kernels/elementwise_unary/generic/sve/impl.cpp @@ -24,6 +24,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/utils/misc/Traits.h" + #include "src/core/NEON/wrapper/intrinsics/intrinsics.h" namespace arm_compute @@ -31,9 +32,10 @@ namespace arm_compute namespace cpu { template <typename ScalarType, typename VectorType> -inline typename std::enable_if<utils::traits::is_floating_point<ScalarType>::value, VectorType>::type elementwise_op_sve_imp(svbool_t pg, ElementWiseUnary op, const VectorType &a) +inline typename std::enable_if<utils::traits::is_floating_point<ScalarType>::value, VectorType>::type +elementwise_op_sve_imp(svbool_t pg, ElementWiseUnary op, const VectorType &a) { - switch(op) + switch (op) { case ElementWiseUnary::RSQRT: return svinvsqrt(pg, a); @@ -55,9 +57,10 @@ inline typename std::enable_if<utils::traits::is_floating_point<ScalarType>::val } template <typename ScalarType, typename VectorType> -inline typename std::enable_if<std::is_integral<ScalarType>::value, VectorType>::type elementwise_op_sve_imp(svbool_t pg, ElementWiseUnary op, const VectorType &a) +inline typename std::enable_if<std::is_integral<ScalarType>::value, VectorType>::type +elementwise_op_sve_imp(svbool_t pg, ElementWiseUnary op, const VectorType &a) { - switch(op) + switch (op) { case ElementWiseUnary::NEG: return svneg_z(pg, a); @@ -81,23 +84,24 @@ void elementwise_sve_op(const ITensor *in, ITensor *out, const Window &window, E Iterator input(in, win); Iterator output(out, win); - execute_window_loop(win, [&](const Coordinates &) - { - auto output_ptr = reinterpret_cast<ScalarType *>(output.ptr()); - const auto input_ptr = reinterpret_cast<const ScalarType *>(input.ptr()); - int x = window_start_x; - - svbool_t pg = wrapper::svwhilelt<ScalarType>(x, window_end_x); - do + execute_window_loop( + win, + [&](const Coordinates &) { - const auto vin = svld1(pg, input_ptr + x); - svst1(pg, output_ptr + x, elementwise_op_sve_imp<ScalarType, decltype(vin)>(pg, op, vin)); - x += wrapper::svcnt<ScalarType>(); - pg = wrapper::svwhilelt<ScalarType>(x, window_end_x); - } - while(svptest_any(all_true_pg, pg)); - }, - input, output); + auto output_ptr = reinterpret_cast<ScalarType *>(output.ptr()); + const auto input_ptr = reinterpret_cast<const ScalarType *>(input.ptr()); + int x = window_start_x; + + svbool_t pg = wrapper::svwhilelt<ScalarType>(x, window_end_x); + do + { + const auto vin = svld1(pg, input_ptr + x); + svst1(pg, output_ptr + x, elementwise_op_sve_imp<ScalarType, decltype(vin)>(pg, op, vin)); + x += wrapper::svcnt<ScalarType>(); + pg = wrapper::svwhilelt<ScalarType>(x, window_end_x); + } while (svptest_any(all_true_pg, pg)); + }, + input, output); } template void elementwise_sve_op<float16_t>(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op); diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/integer.cpp b/src/cpu/kernels/elementwise_unary/generic/sve/integer.cpp index 068c3f7cda..e27fe5a87f 100644 --- a/src/cpu/kernels/elementwise_unary/generic/sve/integer.cpp +++ b/src/cpu/kernels/elementwise_unary/generic/sve/integer.cpp @@ -23,16 +23,18 @@ */ #include "arm_compute/core/Helpers.h" + #include "src/cpu/kernels/elementwise_unary/generic/sve/impl.h" namespace arm_compute { namespace cpu { -void sve_s32_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut) +void sve_s32_elementwise_unary( + const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut) { ARM_COMPUTE_UNUSED(lut); return elementwise_sve_op<int32_t>(in, out, window, op); } -} +} // namespace cpu } // namespace arm_compute |