From f2cde9b29deee6423ea6fe9a1a9afc9ef61d2663 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 23 Aug 2018 15:29:16 +0100 Subject: COMPMID-1534 - Fix NENormalizationLayer for FP16 Implemented vinvq_f16 with fp32 data type in order to avoid accuracy issue. Change-Id: Ibfffd12e4a941c1388a982fc7bbe3e1832351feb Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145416 Reviewed-by: Georgios Pinitas Tested-by: Jenkins --- arm_compute/core/NEON/NEMath.inl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arm_compute') diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index 61d25d115c..1ebc9c10af 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -303,7 +303,16 @@ inline float16x8_t vlogq_f16(float16x8_t x) inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n) { - return vexpq_f16(vmulq_f16(n, vlogq_f16(val))); + // TODO (giaiod01) - COMPMID-1535 + float32x4_t n0_f32 = vcvt_f32_f16(vget_low_f16(n)); + float32x4_t n1_f32 = vcvt_f32_f16(vget_high_f16(n)); + float32x4_t val0_f32 = vcvt_f32_f16(vget_low_f16(val)); + float32x4_t val1_f32 = vcvt_f32_f16(vget_high_f16(val)); + + float32x4_t res0_f32 = vexpq_f32(vmulq_f32(n0_f32, vlogq_f32(val0_f32))); + float32x4_t res1_f32 = vexpq_f32(vmulq_f32(n1_f32, vlogq_f32(val1_f32))); + + return vcombine_f16(vcvt_f16_f32(res0_f32), vcvt_f16_f32(res1_f32)); } #endif /* DOXYGEN_SKIP_THIS */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -- cgit v1.2.1