From ff81de5a9a0f6b9331c3b112cc2aed552f0482a9 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Mon, 26 Sep 2022 17:22:42 +0100 Subject: Fix overflow in NEActivationLayer for FP16 type * Resolves MLCE-924 Change-Id: I3cc3d30893c2ee0865eacafdc1d9ba3d5b876d32 Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8326 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Benchmark: Arm Jenkins --- src/core/NEON/NEMath.inl | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'src/core') diff --git a/src/core/NEON/NEMath.inl b/src/core/NEON/NEMath.inl index 1b0b894153..1755974d2d 100644 --- a/src/core/NEON/NEMath.inl +++ b/src/core/NEON/NEMath.inl @@ -520,16 +520,14 @@ inline float16x8_t vinvq_f16(float16x8_t x) inline float16x8_t vtanhq_f16(float16x8_t val) { - const float16x8_t CONST_1 = vdupq_n_f16(1.f); - const float16x8_t CONST_2 = vdupq_n_f16(2.f); const float16x8_t CONST_MIN_TANH = vdupq_n_f16(-10.f); const float16x8_t CONST_MAX_TANH = vdupq_n_f16(10.f); - - const float16x8_t x = vminq_f16(vmaxq_f16(val, CONST_MIN_TANH), CONST_MAX_TANH); - const float16x8_t exp2x = vexpq_f16(vmulq_f16(CONST_2, x)); - const float16x8_t num = vsubq_f16(exp2x, CONST_1); - const float16x8_t den = vaddq_f16(exp2x, CONST_1); - const float16x8_t tanh = vmulq_f16(num, vinvq_f16(den)); + const float16x8_t x = vminq_f16(vmaxq_f16(val, CONST_MIN_TANH), CONST_MAX_TANH); + const auto expx = vexpq_f16(x); + const auto expmx = vinvq_f16(expx); + const auto ab = vsubq_f16(expx, expmx); + const auto cd = vaddq_f16(expx, expmx); + const float16x8_t tanh = vdivq_f16(ab, cd); return tanh; } -- cgit v1.2.1