aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-11-20 16:03:01 +0000
committerMichalis Spyrou <michalis.spyrou@arm.com>2018-11-21 14:08:19 +0000
commit1c948d47f55ff8a39aa527f63ea7df93a13dd38e (patch)
tree4579798cb95ecaf62b5d7fe61b5d3753301e2804 /arm_compute
parent8b2814ab7b9dc00278132d74d2f738b843b6c0c7 (diff)
downloadComputeLibrary-1c948d47f55ff8a39aa527f63ea7df93a13dd38e.tar.gz
COMPMID-1800: (Nightly) Mismatches in SC9863 board for NEON FP16
Fixes for: - ReduceMean, reduction on the X axis for FP16 with 8 elements was performed only up to a certain point. The fix now takes into account the number of elements of the vector and does as many reductions as necessary. - YOLOLayer, activation for FP16 has to be performed on 32 bits until the FP16 approximations is fixed. Change-Id: I75373f4edd37de476e6fe1a56de3ef386b65c619
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/NEMath.inl35
1 files changed, 5 insertions, 30 deletions
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index 2bc1ab7964..4de80509f0 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -245,37 +245,12 @@ inline float16x8_t vtaylor_polyq_f16(float16x8_t x, const std::array<float16x8_t
inline float16x8_t vexpq_f16(float16x8_t x)
{
- static const std::array<float16x8_t, 8> exp_tab_f16 =
- {
- {
- vdupq_n_f16(1.f),
- vdupq_n_f16(0.0416598916054f),
- vdupq_n_f16(0.500000596046f),
- vdupq_n_f16(0.0014122662833f),
- vdupq_n_f16(1.00000011921f),
- vdupq_n_f16(0.00833693705499f),
- vdupq_n_f16(0.166665703058f),
- vdupq_n_f16(0.000195780929062f),
- }
- };
-
- static const float16x8_t CONST_LN2 = vdupq_n_f16(0.6931471805f); // ln(2)
- static const float16x8_t CONST_INV_LN2 = vdupq_n_f16(1.4426950408f); // 1/ln(2)
- static const float16x8_t CONST_0 = vdupq_n_f16(0.f);
- static const int16x8_t CONST_NEGATIVE_126 = vdupq_n_s16(-126);
-
- // Perform range reduction [-log(2),log(2)]
- const int16x8_t m = vcvtq_s16_f16(vmulq_f16(x, CONST_INV_LN2));
- const float16x8_t val = vsubq_f16(x, vmulq_f16(vcvtq_f16_s16(m), CONST_LN2));
+ // TODO (COMPMID-1535) : Revisit FP16 approximations
+ const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x));
+ const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x));
- // Polynomial Approximation
- float16x8_t poly = vtaylor_polyq_f16(val, exp_tab_f16);
-
- // Reconstruct
- poly = vreinterpretq_f16_s16(vqaddq_s16(vreinterpretq_s16_f16(poly), vqshlq_n_s16(m, 9)));
- poly = vbslq_f16(vcltq_s16(m, CONST_NEGATIVE_126), CONST_0, poly);
-
- return poly;
+ const float16x8_t res = vcvt_high_f16_f32(vcvt_f16_f32(vexpq_f32(x_low)), vexpq_f32(x_high));
+ return res;
}
inline float16x8_t vlogq_f16(float16x8_t x)