diff options
author | Pablo Tello <pablo.tello@arm.com> | 2017-07-05 11:32:17 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:16:42 +0100 |
commit | 91654c45cf1de5f41127536a0fdd310c17fdfc8e (patch) | |
tree | 1cf914061c456282f0ba899ebbdc591cabc7f0fc /arm_compute/core/NEON/NEMath.inl | |
parent | ec69f93dc63408933d322ec27d0b7049b9a6e07c (diff) | |
download | ComputeLibrary-91654c45cf1de5f41127536a0fdd310c17fdfc8e.tar.gz |
COMPMID-421: Added FP16 support in ActivationLayer.
Change-Id: I7ba573b19d56e3c87996edb5218a00e5bfca451e
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79755
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/NEMath.inl')
-rw-r--r-- | arm_compute/core/NEON/NEMath.inl | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index c73c54501f..1d90029147 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -172,6 +172,14 @@ const std::array<float16x8_t, 8> log_tab_f16 = vdupq_n_f16(0.0141278216615f), } }; +inline float16x8_t vinvsqrtq_f16(float16x8_t x) +{ + float16x8_t sqrt_reciprocal = vrsqrteq_f16(x); + sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + + return sqrt_reciprocal; +} inline float16x8_t vinvq_f16(float16x8_t x) { @@ -181,6 +189,21 @@ inline float16x8_t vinvq_f16(float16x8_t x) return recip; } +inline float16x8_t vtanhq_f16(float16x8_t val) +{ + const float16x8_t CONST_1 = vdupq_n_f16(1.f); + const float16x8_t CONST_2 = vdupq_n_f16(2.f); + const float16x8_t CONST_MIN_TANH = vdupq_n_f16(-10.f); + const float16x8_t CONST_MAX_TANH = vdupq_n_f16(10.f); + + const float16x8_t x = vminq_f16(vmaxq_f16(val, CONST_MIN_TANH), CONST_MAX_TANH); + const float16x8_t exp2x = vexpq_f16(vmulq_f16(CONST_2, x)); + const float16x8_t num = vsubq_f16(exp2x, CONST_1); + const float16x8_t den = vaddq_f16(exp2x, CONST_1); + const float16x8_t tanh = vmulq_f16(num, vinvq_f16(den)); + return tanh; +} + inline float16x8_t vtaylor_polyq_f16(float16x8_t x, const std::array<float16x8_t, 8> &coeffs) { const float16x8_t A = vaddq_f16(coeffs[0], vmulq_f16(coeffs[4], x)); |