aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/NEMath.inl
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-12-03 14:30:05 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-01-14 17:53:22 +0000
commit5a5945387e70f62e6e1e95a177fae261d7570443 (patch)
treeff8bd61c2e071b5a0b923f4a0d1bef72486435e9 /arm_compute/core/NEON/NEMath.inl
parentdea2d2d58fe3a742e6f66fe50befbe0044e15ad1 (diff)
downloadComputeLibrary-5a5945387e70f62e6e1e95a177fae261d7570443.tar.gz
COMPMID-1809: Remove padding in NEGEMMConvolutionLayer 64-bit path.
Change-Id: I1806591a2c73a1f057f13d8c6107d7b9796a82c8 Reviewed-on: https://review.mlplatform.org/370 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/NEMath.inl')
-rw-r--r--arm_compute/core/NEON/NEMath.inl34
1 files changed, 6 insertions, 28 deletions
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index 4de80509f0..27b4fc2c1b 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -255,34 +255,12 @@ inline float16x8_t vexpq_f16(float16x8_t x)
inline float16x8_t vlogq_f16(float16x8_t x)
{
- static const std::array<float16x8_t, 8> log_tab_f16 =
- {
- {
- vdupq_n_f16(-2.29561495781f),
- vdupq_n_f16(-2.47071170807f),
- vdupq_n_f16(-5.68692588806f),
- vdupq_n_f16(-0.165253549814f),
- vdupq_n_f16(5.17591238022f),
- vdupq_n_f16(0.844007015228f),
- vdupq_n_f16(4.58445882797f),
- vdupq_n_f16(0.0141278216615f),
- }
- };
-
- static const int16x8_t CONST_127 = vdupq_n_s16(127); // 127
- static const float16x8_t CONST_LN2 = vdupq_n_f16(0.6931471805f); // ln(2)
-
- // Extract exponent
- const int16x8_t m = vsubq_s16(vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_f16(x), 9)), CONST_127);
- const float16x8_t val = vreinterpretq_f16_s16(vsubq_s16(vreinterpretq_s16_f16(x), vshlq_n_s16(m, 9)));
-
- // Polynomial Approximation
- float16x8_t poly = vtaylor_polyq_f16(val, log_tab_f16);
-
- // Reconstruct
- poly = vaddq_f16(poly, vmulq_f16(vcvtq_f16_s16(m), CONST_LN2));
+ // TODO (COMPMID-1535) : Revisit FP16 approximations
+ const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x));
+ const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x));
- return poly;
+ const float16x8_t res = vcvt_high_f16_f32(vcvt_f16_f32(vlogq_f32(x_low)), vlogq_f32(x_high));
+ return res;
}
inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n)