diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2017-08-31 14:21:36 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:35:24 +0000 |
commit | cdf51455df8835e9e3bfd3e31ed389146af9a573 (patch) | |
tree | 31b0bf9302decbf8b1063f46373e3d26a9ca1409 /arm_compute/core/NEON/NEMath.inl | |
parent | 29088d517a2a9f249fe5cc851e0c97de3d4cc917 (diff) | |
download | ComputeLibrary-cdf51455df8835e9e3bfd3e31ed389146af9a573.tar.gz |
COMPMID-515: L2 Pooling for FP32/FP16 in CL.
Change-Id: I43641fa672f5905ca62edd1f63fc93e0cf7ea382
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/85963
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/NEMath.inl')
-rw-r--r-- | arm_compute/core/NEON/NEMath.inl | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index bdd747c4e9..50d85396d4 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -64,6 +64,15 @@ inline float32x4_t vfloorq_f32(float32x4_t val) return vbslq_f32(vcgtq_f32(r, val), vsubq_f32(r, CONST_1), r); } +inline float32x2_t vinvsqrt_f32(float32x2_t x) +{ + float32x2_t sqrt_reciprocal = vrsqrte_f32(x); + sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + + return sqrt_reciprocal; +} + inline float32x4_t vinvsqrtq_f32(float32x4_t x) { float32x4_t sqrt_reciprocal = vrsqrteq_f32(x); @@ -73,6 +82,14 @@ inline float32x4_t vinvsqrtq_f32(float32x4_t x) return sqrt_reciprocal; } +inline float32x2_t vinv_f32(float32x2_t x) +{ + float32x2_t recip = vrecpe_f32(x); + recip = vmul_f32(vrecps_f32(x, recip), recip); + recip = vmul_f32(vrecps_f32(x, recip), recip); + return recip; +} + inline float32x4_t vinvq_f32(float32x4_t x) { float32x4_t recip = vrecpeq_f32(x); @@ -182,6 +199,14 @@ const std::array<float16x8_t, 8> log_tab_f16 = } }; +inline float16x4_t vinvsqrt_f16(float16x4_t x) +{ + float16x4_t sqrt_reciprocal = vrsqrte_f16(x); + sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + return sqrt_reciprocal; +} + inline float16x8_t vinvsqrtq_f16(float16x8_t x) { float16x8_t sqrt_reciprocal = vrsqrteq_f16(x); @@ -190,6 +215,14 @@ inline float16x8_t vinvsqrtq_f16(float16x8_t x) return sqrt_reciprocal; } +inline float16x4_t vinv_f16(float16x4_t x) +{ + float16x4_t recip = vrecpe_f16(x); + recip = vmul_f16(vrecps_f16(x, recip), recip); + recip = vmul_f16(vrecps_f16(x, recip), recip); + return recip; +} + inline float16x8_t vinvq_f16(float16x8_t x) { float16x8_t recip = vrecpeq_f16(x); |