From 926f502ca731fa49bcdf949408ce25728616e5f2 Mon Sep 17 00:00:00 2001 From: Murray Kornelsen Date: Wed, 13 Jul 2022 21:22:39 -0400 Subject: Adding GELU activation OpenCL implementation uses built in erf. NEON implementation requires new vectorized erf. Uses the following approximation: erf(x) = 1 - 1 / (1 + a1x + a2x^2 + a3x^3 + a4x^4)^4 a1 = 0.278393, a2 = 0.230389, a3 = 0.000972, a4 = 0.078108 From https://en.wikipedia.org/wiki/Error_function#Numerical_approximations Signed-off-by: Murray Kornelsen Change-Id: I2d3964b2c26a4334166b17135f9104bc6324fad2 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7921 Reviewed-by: Viet-Hoa Do Reviewed-by: Pablo Marquez Tello Comments-Addressed: Arm Jenkins Comments-Addressed: Pablo Marquez Tello Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- src/cpu/kernels/activation/generic/neon/impl.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/cpu/kernels/activation/generic/neon/impl.h') diff --git a/src/cpu/kernels/activation/generic/neon/impl.h b/src/cpu/kernels/activation/generic/neon/impl.h index 2dd239e3a1..35abcb5408 100644 --- a/src/cpu/kernels/activation/generic/neon/impl.h +++ b/src/cpu/kernels/activation/generic/neon/impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -77,7 +77,9 @@ void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationL const auto const_0 = wrapper::vdup_n(static_cast(0.f), ExactTagType{}); const auto const_6 = wrapper::vdup_n(static_cast(6.f), ExactTagType{}); const auto const_3 = wrapper::vdup_n(static_cast(3.f), ExactTagType{}); + const auto const_inv_2 = wrapper::vdup_n(static_cast(0.5f), ExactTagType{}); const auto const_inv_6 = wrapper::vdup_n(static_cast(0.166666667f), ExactTagType{}); + const auto const_inv_sqrt_2 = wrapper::vdup_n(static_cast(0.70710678118f), ExactTagType{}); constexpr float soft_relu_thresh = 12.f; const auto vsoft_relu_thresh = wrapper::vdup_n(static_cast(soft_relu_thresh), ExactTagType{}); const auto va = wrapper::vdup_n(static_cast(act_info.a()), ExactTagType{}); @@ -146,6 +148,9 @@ void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationL case ActivationLayerInfo::ActivationFunction::HARD_SWISH: tmp = wrapper::vmul(vin, wrapper::vmul(const_inv_6, wrapper::vmin(const_6, wrapper::vmax(const_0, wrapper::vadd(vin, const_3))))); break; + case ActivationLayerInfo::ActivationFunction::GELU: + tmp = wrapper::vmul(vin, wrapper::vmul(const_inv_2, wrapper::vadd(const_1, wrapper::verf(wrapper::vmul(vin, const_inv_sqrt_2))))); + break; default: ARM_COMPUTE_ERROR("Unsupported activation function"); } @@ -200,6 +205,9 @@ void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationL case ActivationLayerInfo::ActivationFunction::HARD_SWISH: tmp = in * ((std::min(std::max((in + 3), 0.0f), 6.0f)) * 0.166666667f); break; + case ActivationLayerInfo::ActivationFunction::GELU: + tmp = in * static_cast(0.5f * (1.0f + erff(static_cast(in) / 1.41421356237f))); + break; default: ARM_COMPUTE_ERROR("Unsupported activation function"); } -- cgit v1.2.1