From 91654c45cf1de5f41127536a0fdd310c17fdfc8e Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Wed, 5 Jul 2017 11:32:17 +0100 Subject: COMPMID-421: Added FP16 support in ActivationLayer. Change-Id: I7ba573b19d56e3c87996edb5218a00e5bfca451e Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79755 Reviewed-by: Anthony Barbier Tested-by: Kaizen --- arm_compute/core/NEON/NEMath.h | 18 +++++++++++++++++ arm_compute/core/NEON/NEMath.inl | 23 ++++++++++++++++++++++ .../core/NEON/kernels/NEActivationLayerKernel.h | 12 +++++++++++ 3 files changed, 53 insertions(+) (limited to 'arm_compute/core') diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h index 8dd9d609e7..b467a600d6 100644 --- a/arm_compute/core/NEON/NEMath.h +++ b/arm_compute/core/NEON/NEMath.h @@ -93,6 +93,24 @@ float32x4_t vtanhq_f32(float32x4_t val); float32x4_t vpowq_f32(float32x4_t val, float32x4_t n); #ifdef ARM_COMPUTE_ENABLE_FP16 +/** Calculate hyperbolic tangent. + * + * tanh(x) = (e^2x - 1)/(e^2x + 1) + * + * @note We clamp x to [-5,5] to avoid overflowing issues. + * + * @param[in] val Input vector value in F32 format. + * + * @return The calculated Hyperbolic Tangent. + */ +float16x8_t vtanhq_f16(float16x8_t val); +/** Calculate inverse square root. + * + * @param[in] x Input value. + * + * @return The calculated inverse square root. + */ +float16x8_t vinvsqrtq_f16(float16x8_t x); /** Calculate exponential * * @param[in] x Input vector value in F16 format. diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index c73c54501f..1d90029147 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -172,6 +172,14 @@ const std::array log_tab_f16 = vdupq_n_f16(0.0141278216615f), } }; +inline float16x8_t vinvsqrtq_f16(float16x8_t x) +{ + float16x8_t sqrt_reciprocal = vrsqrteq_f16(x); + sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + + return sqrt_reciprocal; +} inline float16x8_t vinvq_f16(float16x8_t x) { @@ -181,6 +189,21 @@ inline float16x8_t vinvq_f16(float16x8_t x) return recip; } +inline float16x8_t vtanhq_f16(float16x8_t val) +{ + const float16x8_t CONST_1 = vdupq_n_f16(1.f); + const float16x8_t CONST_2 = vdupq_n_f16(2.f); + const float16x8_t CONST_MIN_TANH = vdupq_n_f16(-10.f); + const float16x8_t CONST_MAX_TANH = vdupq_n_f16(10.f); + + const float16x8_t x = vminq_f16(vmaxq_f16(val, CONST_MIN_TANH), CONST_MAX_TANH); + const float16x8_t exp2x = vexpq_f16(vmulq_f16(CONST_2, x)); + const float16x8_t num = vsubq_f16(exp2x, CONST_1); + const float16x8_t den = vaddq_f16(exp2x, CONST_1); + const float16x8_t tanh = vmulq_f16(num, vinvq_f16(den)); + return tanh; +} + inline float16x8_t vtaylor_polyq_f16(float16x8_t x, const std::array &coeffs) { const float16x8_t A = vaddq_f16(coeffs[0], vmulq_f16(coeffs[4], x)); diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index e995f1e5e0..2c88debfb4 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -27,6 +27,10 @@ #include "arm_compute/core/FixedPoint.h" #include "arm_compute/core/NEON/INEKernel.h" +#ifdef ARM_COMPUTE_ENABLE_FP16 +#include +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + namespace arm_compute { class ITensor; @@ -72,6 +76,14 @@ private: */ template typename std::enable_if::value, void>::type activation(const Window &window); +#ifdef ARM_COMPUTE_ENABLE_FP16 + /** Function to apply an activation function on a tensor. + * + * @param[in] window Region on which to execute the kernel + */ + template + typename std::enable_if::value, void>::type activation(const Window &window); +#endif /* ARM_COMPUTE_ENABLE_FP16 */ /** Function to apply an activation function on a tensor. * * @param[in] window Region on which to execute the kernel -- cgit v1.2.1