aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/activation/generic
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/kernels/activation/generic')
-rw-r--r--src/cpu/kernels/activation/generic/neon/impl.h10
-rw-r--r--src/cpu/kernels/activation/generic/neon/qasymm8.cpp29
2 files changed, 37 insertions, 2 deletions
diff --git a/src/cpu/kernels/activation/generic/neon/impl.h b/src/cpu/kernels/activation/generic/neon/impl.h
index 2dd239e3a1..35abcb5408 100644
--- a/src/cpu/kernels/activation/generic/neon/impl.h
+++ b/src/cpu/kernels/activation/generic/neon/impl.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -77,7 +77,9 @@ void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationL
const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
const auto const_6 = wrapper::vdup_n(static_cast<T>(6.f), ExactTagType{});
const auto const_3 = wrapper::vdup_n(static_cast<T>(3.f), ExactTagType{});
+ const auto const_inv_2 = wrapper::vdup_n(static_cast<T>(0.5f), ExactTagType{});
const auto const_inv_6 = wrapper::vdup_n(static_cast<T>(0.166666667f), ExactTagType{});
+ const auto const_inv_sqrt_2 = wrapper::vdup_n(static_cast<T>(0.70710678118f), ExactTagType{});
constexpr float soft_relu_thresh = 12.f;
const auto vsoft_relu_thresh = wrapper::vdup_n(static_cast<T>(soft_relu_thresh), ExactTagType{});
const auto va = wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{});
@@ -146,6 +148,9 @@ void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationL
case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
tmp = wrapper::vmul(vin, wrapper::vmul(const_inv_6, wrapper::vmin(const_6, wrapper::vmax(const_0, wrapper::vadd(vin, const_3)))));
break;
+ case ActivationLayerInfo::ActivationFunction::GELU:
+ tmp = wrapper::vmul(vin, wrapper::vmul(const_inv_2, wrapper::vadd(const_1, wrapper::verf(wrapper::vmul(vin, const_inv_sqrt_2)))));
+ break;
default:
ARM_COMPUTE_ERROR("Unsupported activation function");
}
@@ -200,6 +205,9 @@ void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationL
case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
tmp = in * ((std::min(std::max((in + 3), 0.0f), 6.0f)) * 0.166666667f);
break;
+ case ActivationLayerInfo::ActivationFunction::GELU:
+ tmp = in * static_cast<T>(0.5f * (1.0f + erff(static_cast<float>(in) / 1.41421356237f)));
+ break;
default:
ARM_COMPUTE_ERROR("Unsupported activation function");
}
diff --git a/src/cpu/kernels/activation/generic/neon/qasymm8.cpp b/src/cpu/kernels/activation/generic/neon/qasymm8.cpp
index 67d9e0a8ca..05a0b505ca 100644
--- a/src/cpu/kernels/activation/generic/neon/qasymm8.cpp
+++ b/src/cpu/kernels/activation/generic/neon/qasymm8.cpp
@@ -58,9 +58,13 @@ void neon_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL
const qasymm8_t b = quantize_qasymm8(act_info.b(), qi_in);
const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
+ const auto vconst_1 = vdupq_n_f32(1.f);
+
#ifndef __aarch64__
- const auto vconst_1 = vdupq_n_f32(1.f);
const auto vconst_0_f32 = vdupq_n_f32(0);
+#else // #ifndef __aarch64__
+ const auto const_inv_2 = vdupq_n_f32(0.5f);
+ const auto const_inv_sqrt_2 = vdupq_n_f32(0.70710678118f);
#endif // __aarch64__
const float32x4_t va_f32 = vdupq_n_f32(act_info.a());
const float32x4_t vb_f32 = vdupq_n_f32(act_info.b());
@@ -193,6 +197,23 @@ void neon_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL
tmp = vquantize(tmp_dep, qi_out);
}
+#else // #ifndef __aarch64__
+ else if (act == ActivationLayerInfo::ActivationFunction::GELU)
+ {
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep =
+ {
+ {
+ wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_2, wrapper::vadd(vconst_1, wrapper::verf(wrapper::vmul(vin_deq.val[0], const_inv_sqrt_2))))),
+ wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_2, wrapper::vadd(vconst_1, wrapper::verf(wrapper::vmul(vin_deq.val[1], const_inv_sqrt_2))))),
+ wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_2, wrapper::vadd(vconst_1, wrapper::verf(wrapper::vmul(vin_deq.val[2], const_inv_sqrt_2))))),
+ wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_2, wrapper::vadd(vconst_1, wrapper::verf(wrapper::vmul(vin_deq.val[3], const_inv_sqrt_2))))),
+ }
+ };
+ // Re-quantize to new output space
+ tmp = vquantize(tmp_dep, qi_out);
+ }
#endif // __aarch64__
else
{
@@ -248,6 +269,12 @@ void neon_qasymm8_activation(const ITensor *src, ITensor *dst, const ActivationL
tmp_f = tmp_f > 0 ? tmp_f : tmp_f * a_f32;
tmp = quantize_qasymm8(tmp_f, qi_out);
}
+ else if(act == ActivationLayerInfo::ActivationFunction::GELU)
+ {
+ float tmp_f = dequantize_qasymm8(in, qi_in);
+ tmp = tmp_f * 0.5f * (1.0f + std::erff(in / 1.41421356237f));
+ tmp = quantize_qasymm8(tmp_f, qi_out);
+ }
#endif // __aarch64__
else
{