From d6b8a71714361881a249a6f6ed67125f290f4a83 Mon Sep 17 00:00:00 2001 From: Jonathan Deakin Date: Tue, 23 Aug 2022 11:44:18 +0100 Subject: =?UTF-8?q?Add=20FP32=20Neon=E2=84=A2=20swish=20activation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Id37b59adbc8c4cbe218d1652aeb02a0b4ce42c66 Signed-off-by: Jonathan Deakin Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8256 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez Tello Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/cpu/kernels/activation/generic/neon/impl.h | 6 ++++++ src/cpu/kernels/activation/generic/sve/fp16.cpp | 5 ++++- src/cpu/kernels/activation/generic/sve/fp32.cpp | 3 +++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'src/cpu') diff --git a/src/cpu/kernels/activation/generic/neon/impl.h b/src/cpu/kernels/activation/generic/neon/impl.h index 4d4aa8d212..4f392866b5 100644 --- a/src/cpu/kernels/activation/generic/neon/impl.h +++ b/src/cpu/kernels/activation/generic/neon/impl.h @@ -149,6 +149,9 @@ void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationL case ActivationLayerInfo::ActivationFunction::HARD_SWISH: tmp = wrapper::vmul(vin, wrapper::vmul(const_inv_6, wrapper::vmin(const_6, wrapper::vmax(const_0, wrapper::vadd(vin, const_3))))); break; + case ActivationLayerInfo::ActivationFunction::SWISH: + tmp = wrapper::vmul(vin, wrapper::vinv(wrapper::vadd(const_1, wrapper::vexpq(wrapper::vneg(wrapper::vmul(va, vin)))))); + break; #ifdef __aarch64__ case ActivationLayerInfo::ActivationFunction::GELU: tmp = wrapper::vmul(vin, wrapper::vmul(const_inv_2, wrapper::vadd(const_1, wrapper::verf(wrapper::vmul(vin, const_inv_sqrt_2))))); @@ -208,6 +211,9 @@ void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationL case ActivationLayerInfo::ActivationFunction::HARD_SWISH: tmp = in * ((std::min(std::max((in + 3), 0.0f), 6.0f)) * 0.166666667f); break; + case ActivationLayerInfo::ActivationFunction::SWISH: + tmp = in / (static_cast(1) + std::exp(-a*in)); + break; case ActivationLayerInfo::ActivationFunction::GELU: tmp = in * static_cast(0.5f * (1.0f + erff(static_cast(in) / 1.41421356237f))); break; diff --git a/src/cpu/kernels/activation/generic/sve/fp16.cpp b/src/cpu/kernels/activation/generic/sve/fp16.cpp index 5730a361d9..6e9648b5bf 100644 --- a/src/cpu/kernels/activation/generic/sve/fp16.cpp +++ b/src/cpu/kernels/activation/generic/sve/fp16.cpp @@ -114,6 +114,9 @@ void sve_fp16_activation(const ITensor *src, ITensor *dst, const ActivationLayer case ActivationLayerInfo::ActivationFunction::HARD_SWISH: tmp = svmul_f16_z(pg, vin, svmul_f16_z(pg, const_inv_6, svmin_f16_z(pg, const_6, svmax_f16_z(pg, const_0, svadd_f16_z(pg, vin, const_3))))); break; + case ActivationLayerInfo::ActivationFunction::SWISH: + tmp = svmul_f16_z(pg, vin, svinv_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, svneg_f16_z(pg, svmul_f16_z(pg, va, vin)))))); + break; default: ARM_COMPUTE_ERROR("Unsupported activation function"); } @@ -129,4 +132,4 @@ void sve_fp16_activation(const ITensor *src, ITensor *dst, const ActivationLayer } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ \ No newline at end of file +#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ diff --git a/src/cpu/kernels/activation/generic/sve/fp32.cpp b/src/cpu/kernels/activation/generic/sve/fp32.cpp index 7ce2046730..b5490063cf 100644 --- a/src/cpu/kernels/activation/generic/sve/fp32.cpp +++ b/src/cpu/kernels/activation/generic/sve/fp32.cpp @@ -113,6 +113,9 @@ void sve_fp32_activation(const ITensor *src, ITensor *dst, const ActivationLayer case ActivationLayerInfo::ActivationFunction::HARD_SWISH: tmp = svmul_f32_z(pg, vin, svmul_f32_z(pg, const_inv_6, svmin_f32_z(pg, const_6, svmax_f32_z(pg, const_0, svadd_f32_z(pg, vin, const_3))))); break; + case ActivationLayerInfo::ActivationFunction::SWISH: + tmp = svmul_f32_z(pg, vin, svinv_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, svneg_f32_z(pg, svmul_f32_z(pg, va, vin)))))); + break; default: ARM_COMPUTE_ERROR("Unsupported activation function"); } -- cgit v1.2.1