diff options
author | Pablo Tello <pablo.tello@arm.com> | 2017-07-05 15:20:38 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:16:42 +0100 |
commit | 8fda1cb6f4142133fff045a6f9c18778757c316c (patch) | |
tree | 3f0ad562b24cc3c76e8a745cb59cd584b664ec57 /arm_compute | |
parent | 8df3fafde3dcf131def3471db8e8b1a1c34b354b (diff) | |
download | ComputeLibrary-8fda1cb6f4142133fff045a6f9c18778757c316c.tar.gz |
COMPMID-421: Added FP16 support in BatchNormalizationLayer.
Change-Id: I7142e0e8466ef79e016ae56d285e8e9291573e52
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79814
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/NEON/NEMath.h | 10 | ||||
-rw-r--r-- | arm_compute/core/NEON/NEMath.inl | 3 |
2 files changed, 11 insertions, 2 deletions
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h index b467a600d6..39f0c3bf77 100644 --- a/arm_compute/core/NEON/NEMath.h +++ b/arm_compute/core/NEON/NEMath.h @@ -36,6 +36,16 @@ namespace arm_compute */ float32x4_t vinvsqrtq_f32(float32x4_t x); +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** Calculate inverse square root. + * + * @param[in] x Input value. + * + * @return The calculated inverse square root. + */ +float16x8_t vinvsqrtq_f16(float16x8_t x); +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + /** Calculate reciprocal. * * @param[in] x Input value. diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index 1d90029147..08f6749ac9 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -141,7 +141,6 @@ inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n) { return vexpq_f32(vmulq_f32(n, vlogq_f32(val))); } - #ifdef ARM_COMPUTE_ENABLE_FP16 /* Exponent polynomial coefficients */ const std::array<float16x8_t, 8> exp_tab_f16 = @@ -172,12 +171,12 @@ const std::array<float16x8_t, 8> log_tab_f16 = vdupq_n_f16(0.0141278216615f), } }; + inline float16x8_t vinvsqrtq_f16(float16x8_t x) { float16x8_t sqrt_reciprocal = vrsqrteq_f16(x); sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); - return sqrt_reciprocal; } |