From 1c948d47f55ff8a39aa527f63ea7df93a13dd38e Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 20 Nov 2018 16:03:01 +0000 Subject: COMPMID-1800: (Nightly) Mismatches in SC9863 board for NEON FP16 Fixes for: - ReduceMean, reduction on the X axis for FP16 with 8 elements was performed only up to a certain point. The fix now takes into account the number of elements of the vector and does as many reductions as necessary. - YOLOLayer, activation for FP16 has to be performed on 32 bits until the FP16 approximations is fixed. Change-Id: I75373f4edd37de476e6fe1a56de3ef386b65c619 --- src/core/NEON/kernels/NEActivationLayerKernel.cpp | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'src/core/NEON/kernels/NEActivationLayerKernel.cpp') diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 2163f7bb63..5ce79f1007 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -184,7 +184,7 @@ typename std::enable_if::value, void>::type NEActivat Iterator output(_output, window); static const float16x8_t CONST_0 = vdupq_n_f16(0.f); - static const float16x4_t CONST_1_H = vdup_n_f16(1.f); + static const float16x8_t CONST_1_H = vdupq_n_f16(1.f); static const float32x4_t CONST_1_F32 = vdupq_n_f32(1.f); @@ -240,24 +240,11 @@ typename std::enable_if::value, void>::type NEActivat break; case ActivationFunction::LOGISTIC: { - // TODO (COMPMID-1535) : Revisit FP16 approximations - const float16x4x2_t in0 = - { - vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_low_f16(in.val[0]))))))), - vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_high_f16(in.val[0]))))))), - }; - - const float16x4x2_t in1 = - { - vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_low_f16(in.val[1]))))))), - vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_high_f16(in.val[1]))))))), - }; - tmp = { { - vcombine_f16(in0.val[0], in0.val[1]), - vcombine_f16(in1.val[0], in1.val[1]), + vinvq_f16(vaddq_f16(CONST_1_H, vexpq_f16(vnegq_f16(in.val[0])))), + vinvq_f16(vaddq_f16(CONST_1_H, vexpq_f16(vnegq_f16(in.val[1])))) } }; } -- cgit v1.2.1