From cdf51455df8835e9e3bfd3e31ed389146af9a573 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 31 Aug 2017 14:21:36 +0100 Subject: COMPMID-515: L2 Pooling for FP32/FP16 in CL. Change-Id: I43641fa672f5905ca62edd1f63fc93e0cf7ea382 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/85963 Tested-by: Kaizen Reviewed-by: Gian Marco Iodice --- arm_compute/core/NEON/NEMath.h | 41 ++++++++++++++++++++++++++++++++++++---- arm_compute/core/NEON/NEMath.inl | 33 ++++++++++++++++++++++++++++++++ arm_compute/core/Types.h | 3 ++- arm_compute/core/Utils.h | 7 +++++++ 4 files changed, 79 insertions(+), 5 deletions(-) (limited to 'arm_compute/core') diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h index 523649c65d..ba65926802 100644 --- a/arm_compute/core/NEON/NEMath.h +++ b/arm_compute/core/NEON/NEMath.h @@ -42,17 +42,23 @@ float32x4_t vfloorq_f32(float32x4_t val); * * @return The calculated inverse square root. */ -float32x4_t vinvsqrtq_f32(float32x4_t x); +float32x2_t vinvsqrt_f32(float32x2_t x); -#ifdef ARM_COMPUTE_ENABLE_FP16 /** Calculate inverse square root. * * @param[in] x Input value. * * @return The calculated inverse square root. */ -float16x8_t vinvsqrtq_f16(float16x8_t x); -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +float32x4_t vinvsqrtq_f32(float32x4_t x); + +/** Calculate reciprocal. + * + * @param[in] x Input value. + * + * @return The calculated reciprocal. + */ +float32x2_t vinv_f32(float32x2_t x); /** Calculate reciprocal. * @@ -122,6 +128,31 @@ float32x4_t vpowq_f32(float32x4_t val, float32x4_t n); * @return The calculated Hyperbolic Tangent. */ float16x8_t vtanhq_f16(float16x8_t val); + +/** Calculate reciprocal. + * + * @param[in] x Input value. + * + * @return The calculated reciprocal. + */ +float16x4_t vinv_f16(float16x4_t x); + +/** Calculate reciprocal. + * + * @param[in] x Input value. + * + * @return The calculated reciprocal. + */ +float16x8_t vinvq_f16(float16x8_t x); + +/** Calculate inverse square root. + * + * @param[in] x Input value. + * + * @return The calculated inverse square root. + */ +float16x4_t vinvsqrt_f16(float16x4_t x); + /** Calculate inverse square root. * * @param[in] x Input value. @@ -129,6 +160,7 @@ float16x8_t vtanhq_f16(float16x8_t val); * @return The calculated inverse square root. */ float16x8_t vinvsqrtq_f16(float16x8_t x); + /** Calculate exponential * * @param[in] x Input vector value in F16 format. @@ -136,6 +168,7 @@ float16x8_t vinvsqrtq_f16(float16x8_t x); * @return The calculated exponent. */ float16x8_t vexpq_f16(float16x8_t x); + /** Calculate n power of a number. * * pow(x,n) = e^(n*log(x)) diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl index bdd747c4e9..50d85396d4 100644 --- a/arm_compute/core/NEON/NEMath.inl +++ b/arm_compute/core/NEON/NEMath.inl @@ -64,6 +64,15 @@ inline float32x4_t vfloorq_f32(float32x4_t val) return vbslq_f32(vcgtq_f32(r, val), vsubq_f32(r, CONST_1), r); } +inline float32x2_t vinvsqrt_f32(float32x2_t x) +{ + float32x2_t sqrt_reciprocal = vrsqrte_f32(x); + sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + + return sqrt_reciprocal; +} + inline float32x4_t vinvsqrtq_f32(float32x4_t x) { float32x4_t sqrt_reciprocal = vrsqrteq_f32(x); @@ -73,6 +82,14 @@ inline float32x4_t vinvsqrtq_f32(float32x4_t x) return sqrt_reciprocal; } +inline float32x2_t vinv_f32(float32x2_t x) +{ + float32x2_t recip = vrecpe_f32(x); + recip = vmul_f32(vrecps_f32(x, recip), recip); + recip = vmul_f32(vrecps_f32(x, recip), recip); + return recip; +} + inline float32x4_t vinvq_f32(float32x4_t x) { float32x4_t recip = vrecpeq_f32(x); @@ -182,6 +199,14 @@ const std::array log_tab_f16 = } }; +inline float16x4_t vinvsqrt_f16(float16x4_t x) +{ + float16x4_t sqrt_reciprocal = vrsqrte_f16(x); + sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + return sqrt_reciprocal; +} + inline float16x8_t vinvsqrtq_f16(float16x8_t x) { float16x8_t sqrt_reciprocal = vrsqrteq_f16(x); @@ -190,6 +215,14 @@ inline float16x8_t vinvsqrtq_f16(float16x8_t x) return sqrt_reciprocal; } +inline float16x4_t vinv_f16(float16x4_t x) +{ + float16x4_t recip = vrecpe_f16(x); + recip = vmul_f16(vrecps_f16(x, recip), recip); + recip = vmul_f16(vrecps_f16(x, recip), recip); + return recip; +} + inline float16x8_t vinvq_f16(float16x8_t x) { float16x8_t recip = vrecpeq_f16(x); diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 7d9cd4e0cc..8750a9cf1f 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -398,7 +398,8 @@ enum class DimensionRoundingType enum class PoolingType { MAX, /**< Max Pooling */ - AVG /**< Average Pooling */ + AVG, /**< Average Pooling */ + L2 /**< L2 Pooling */ }; /** Padding and stride information class */ diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 39ec6587de..ab5d110f91 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -609,6 +609,13 @@ const std::string &string_from_border_mode(BorderMode border_mode); * @return The string describing the normalization type. */ const std::string &string_from_norm_type(NormType type); +/** Translates a given pooling type to a string. + * + * @param[in] type @ref PoolingType to be translated to string. + * + * @return The string describing the pooling type. + */ +const std::string &string_from_pooling_type(PoolingType type); /** Lower a given string. * * @param[in] val Given string to lower. -- cgit v1.2.1