aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/activation/generic/neon/impl.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/kernels/activation/generic/neon/impl.h')
-rw-r--r--src/cpu/kernels/activation/generic/neon/impl.h274
1 files changed, 145 insertions, 129 deletions
diff --git a/src/cpu/kernels/activation/generic/neon/impl.h b/src/cpu/kernels/activation/generic/neon/impl.h
index 05885d8476..afeb6f7f3d 100644
--- a/src/cpu/kernels/activation/generic/neon/impl.h
+++ b/src/cpu/kernels/activation/generic/neon/impl.h
@@ -24,6 +24,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/function_info/ActivationLayerInfo.h"
+
#include "src/core/NEON/wrapper/wrapper.h"
namespace arm_compute
{
@@ -56,10 +57,14 @@ inline float16x8_t mask_float_vector(const float16x8_t &in, const uint16x8_t &ma
#endif /* __aarch64__ */
template <typename T, const ActFpImplParams &P>
-void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+void fp_neon_activation_impl(const ITensor *src,
+ ITensor *dst,
+ const ActivationLayerInfo &act_info,
+ const Window &window)
{
/** SIMD vector tag type. */
- using ExactTagType = typename arm_compute::wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
+ using ExactTagType =
+ typename arm_compute::wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
constexpr int window_step_x = P.step_x;
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
@@ -72,12 +77,12 @@ void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationL
// to prevent NAN values caused by zeros in inputs to SQRT.
// In case of aarh64, we call vsqrt directly, so we don't use delta.
#ifndef __aarch64__
- const auto delta = wrapper::vdup_n(static_cast<T>(P.delta), ExactTagType {});
+ const auto delta = wrapper::vdup_n(static_cast<T>(P.delta), ExactTagType{});
#else /* #ifndef __aarch64__ */
- const auto const_inv_2 = wrapper::vdup_n(static_cast<T>(0.5f), ExactTagType {});
+ const auto const_inv_2 = wrapper::vdup_n(static_cast<T>(0.5f), ExactTagType{});
const auto const_inv_sqrt_2 = wrapper::vdup_n(static_cast<T>(0.70710678118f), ExactTagType{});
#endif /* __aarch64__ */
- const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType {});
+ const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{});
const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
const auto const_6 = wrapper::vdup_n(static_cast<T>(6.f), ExactTagType{});
const auto const_3 = wrapper::vdup_n(static_cast<T>(3.f), ExactTagType{});
@@ -88,143 +93,154 @@ void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationL
const auto vb = wrapper::vdup_n(static_cast<T>(act_info.b()), ExactTagType{});
const auto a = static_cast<T>(act_info.a());
const auto b = static_cast<T>(act_info.b());
- execute_window_loop(win_collapsed, [&](const Coordinates &)
- {
- const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
- const auto output_ptr = reinterpret_cast<T *>(output.ptr());
- wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
- // Compute S elements per iteration
- int x = window_start_x;
- for(; x <= (window_end_x - window_step_x); x += window_step_x)
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &)
{
- const auto vin = wrapper::vloadq(input_ptr + x);
- switch(act)
+ const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<T *>(output.ptr());
+ wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
+ // Compute S elements per iteration
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
{
- case ActivationLayerInfo::ActivationFunction::ABS:
- tmp = wrapper::vabs(vin);
- break;
- case ActivationLayerInfo::ActivationFunction::LINEAR:
- tmp = wrapper::vmla(vb, va, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::LOGISTIC:
- tmp = wrapper::vinv(wrapper::vadd(const_1, wrapper::vexpq(wrapper::vneg(vin))));
- break;
- case ActivationLayerInfo::ActivationFunction::RELU:
- tmp = wrapper::vmax(const_0, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
- tmp = wrapper::vmin(va, wrapper::vmax(const_0, vin));
- break;
- case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
- tmp = wrapper::vmin(va, wrapper::vmax(vb, vin));
- break;
- case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
- tmp = wrapper::vbsl(wrapper::vcgt(vin, const_0), vin, wrapper::vmul(va, vin));
- break;
- case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
- tmp = wrapper::vbsl(wrapper::vcgt(vin, vsoft_relu_thresh), vin, wrapper::vlog(wrapper::vadd(const_1, wrapper::vexpq(vin))));
- break;
- case ActivationLayerInfo::ActivationFunction::ELU:
- tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin, wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1)));
- break;
- case ActivationLayerInfo::ActivationFunction::SQRT:
+ const auto vin = wrapper::vloadq(input_ptr + x);
+ switch (act)
+ {
+ case ActivationLayerInfo::ActivationFunction::ABS:
+ tmp = wrapper::vabs(vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LINEAR:
+ tmp = wrapper::vmla(vb, va, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+ tmp = wrapper::vinv(wrapper::vadd(const_1, wrapper::vexpq(wrapper::vneg(vin))));
+ break;
+ case ActivationLayerInfo::ActivationFunction::RELU:
+ tmp = wrapper::vmax(const_0, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+ tmp = wrapper::vmin(va, wrapper::vmax(const_0, vin));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+ tmp = wrapper::vmin(va, wrapper::vmax(vb, vin));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
+ tmp = wrapper::vbsl(wrapper::vcgt(vin, const_0), vin, wrapper::vmul(va, vin));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+ tmp = wrapper::vbsl(wrapper::vcgt(vin, vsoft_relu_thresh), vin,
+ wrapper::vlog(wrapper::vadd(const_1, wrapper::vexpq(vin))));
+ break;
+ case ActivationLayerInfo::ActivationFunction::ELU:
+ tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin,
+ wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQRT:
#ifdef __aarch64__
- tmp = wrapper::vsqrt(vin);
+ tmp = wrapper::vsqrt(vin);
#else /* __aarch64__ */
{
const auto bitmask = wrapper::vceq(vin, wrapper::vdup_n(0.f, ExactTagType{}));
- tmp = wrapper::vinv(wrapper::vinvsqrt(wrapper::vadd(vin, mask_float_vector(delta, bitmask))));
- tmp = mask_float_vector(tmp, wrapper::vnot(bitmask));
+ tmp = wrapper::vinv(wrapper::vinvsqrt(wrapper::vadd(vin, mask_float_vector(delta, bitmask))));
+ tmp = mask_float_vector(tmp, wrapper::vnot(bitmask));
}
#endif /* __aarch64__ */
- break;
- case ActivationLayerInfo::ActivationFunction::SQUARE:
- tmp = wrapper::vmul(vin, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::TANH:
- tmp = wrapper::vmul(va, wrapper::vtanh(wrapper::vmul(vb, vin)));
- break;
- case ActivationLayerInfo::ActivationFunction::IDENTITY:
- tmp = vin;
- break;
- case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
- tmp = wrapper::vmul(vin, wrapper::vmul(const_inv_6, wrapper::vmin(const_6, wrapper::vmax(const_0, wrapper::vadd(vin, const_3)))));
- break;
- case ActivationLayerInfo::ActivationFunction::SWISH:
- tmp = wrapper::vmul(vin, wrapper::vinv(wrapper::vadd(const_1, wrapper::vexpq(wrapper::vneg(wrapper::vmul(va, vin))))));
- break;
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQUARE:
+ tmp = wrapper::vmul(vin, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::TANH:
+ tmp = wrapper::vmul(va, wrapper::vtanh(wrapper::vmul(vb, vin)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::IDENTITY:
+ tmp = vin;
+ break;
+ case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
+ tmp = wrapper::vmul(
+ vin,
+ wrapper::vmul(const_inv_6,
+ wrapper::vmin(const_6, wrapper::vmax(const_0, wrapper::vadd(vin, const_3)))));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SWISH:
+ tmp = wrapper::vmul(vin, wrapper::vinv(wrapper::vadd(
+ const_1, wrapper::vexpq(wrapper::vneg(wrapper::vmul(va, vin))))));
+ break;
#ifdef __aarch64__
- case ActivationLayerInfo::ActivationFunction::GELU:
- tmp = wrapper::vmul(vin, wrapper::vmul(const_inv_2, wrapper::vadd(const_1, wrapper::verf(wrapper::vmul(vin, const_inv_sqrt_2)))));
- break;
+ case ActivationLayerInfo::ActivationFunction::GELU:
+ tmp = wrapper::vmul(
+ vin,
+ wrapper::vmul(const_inv_2,
+ wrapper::vadd(const_1, wrapper::verf(wrapper::vmul(vin, const_inv_sqrt_2)))));
+ break;
#endif /* __aarch64__ */
- default:
- ARM_COMPUTE_ERROR("Unsupported activation function");
+ default:
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ wrapper::vstore(output_ptr + x, tmp);
}
- wrapper::vstore(output_ptr + x, tmp);
- }
- // Compute left-over elements
- for(; x < window_end_x; ++x)
- {
- const T in = *(reinterpret_cast<const T *>(input_ptr + x));
- T tmp;
- switch(act)
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
{
- case ActivationLayerInfo::ActivationFunction::ABS:
- tmp = std::abs(in);
- break;
- case ActivationLayerInfo::ActivationFunction::LINEAR:
- tmp = a * in + b;
- break;
- case ActivationLayerInfo::ActivationFunction::LOGISTIC:
- tmp = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-in));
- break;
- case ActivationLayerInfo::ActivationFunction::RELU:
- tmp = std::max<T>(static_cast<T>(0), in);
- break;
- case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
- tmp = std::min<T>(a, std::max(static_cast<T>(0), in));
- break;
- case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
- tmp = std::min<T>(a, std::max<T>(b, in));
- break;
- case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
- tmp = (in > 0) ? in : a * in;
- break;
- case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
- tmp = (in > soft_relu_thresh) ? in : std::log(static_cast<T>(1) + std::exp(in));
- break;
- case ActivationLayerInfo::ActivationFunction::ELU:
- tmp = (in >= 0) ? in : a * (std::exp(in) - 1);
- break;
- case ActivationLayerInfo::ActivationFunction::SQRT:
- tmp = std::sqrt(in);
- break;
- case ActivationLayerInfo::ActivationFunction::SQUARE:
- tmp = in * in;
- break;
- case ActivationLayerInfo::ActivationFunction::TANH:
- tmp = a * std::tanh(b * in);
- break;
- case ActivationLayerInfo::ActivationFunction::IDENTITY:
- tmp = in;
- break;
- case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
- tmp = in * ((std::min(std::max((in + 3), 0.0f), 6.0f)) * 0.166666667f);
- break;
- case ActivationLayerInfo::ActivationFunction::SWISH:
- tmp = in / (static_cast<T>(1) + std::exp(-a * in));
- break;
- case ActivationLayerInfo::ActivationFunction::GELU:
- tmp = in * static_cast<T>(0.5f * (1.0f + erff(static_cast<float>(in) / 1.41421356237f)));
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported activation function");
+ const T in = *(reinterpret_cast<const T *>(input_ptr + x));
+ T tmp;
+ switch (act)
+ {
+ case ActivationLayerInfo::ActivationFunction::ABS:
+ tmp = std::abs(in);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LINEAR:
+ tmp = a * in + b;
+ break;
+ case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+ tmp = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-in));
+ break;
+ case ActivationLayerInfo::ActivationFunction::RELU:
+ tmp = std::max<T>(static_cast<T>(0), in);
+ break;
+ case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+ tmp = std::min<T>(a, std::max(static_cast<T>(0), in));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+ tmp = std::min<T>(a, std::max<T>(b, in));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
+ tmp = (in > 0) ? in : a * in;
+ break;
+ case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+ tmp = (in > soft_relu_thresh) ? in : std::log(static_cast<T>(1) + std::exp(in));
+ break;
+ case ActivationLayerInfo::ActivationFunction::ELU:
+ tmp = (in >= 0) ? in : a * (std::exp(in) - 1);
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQRT:
+ tmp = std::sqrt(in);
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQUARE:
+ tmp = in * in;
+ break;
+ case ActivationLayerInfo::ActivationFunction::TANH:
+ tmp = a * std::tanh(b * in);
+ break;
+ case ActivationLayerInfo::ActivationFunction::IDENTITY:
+ tmp = in;
+ break;
+ case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
+ tmp = in * ((std::min(std::max((in + 3), 0.0f), 6.0f)) * 0.166666667f);
+ break;
+ case ActivationLayerInfo::ActivationFunction::SWISH:
+ tmp = in / (static_cast<T>(1) + std::exp(-a * in));
+ break;
+ case ActivationLayerInfo::ActivationFunction::GELU:
+ tmp = in * static_cast<T>(0.5f * (1.0f + erff(static_cast<float>(in) / 1.41421356237f)));
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ *(output_ptr + x) = tmp;
}
- *(output_ptr + x) = tmp;
- }
- },
- input, output);
+ },
+ input, output);
}
} // namespace cpu
} // namespace arm_compute