aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-08-31 14:21:36 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commitcdf51455df8835e9e3bfd3e31ed389146af9a573 (patch)
tree31b0bf9302decbf8b1063f46373e3d26a9ca1409 /arm_compute
parent29088d517a2a9f249fe5cc851e0c97de3d4cc917 (diff)
downloadComputeLibrary-cdf51455df8835e9e3bfd3e31ed389146af9a573.tar.gz
COMPMID-515: L2 Pooling for FP32/FP16 in CL.
Change-Id: I43641fa672f5905ca62edd1f63fc93e0cf7ea382 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/85963 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/NEMath.h41
-rw-r--r--arm_compute/core/NEON/NEMath.inl33
-rw-r--r--arm_compute/core/Types.h3
-rw-r--r--arm_compute/core/Utils.h7
-rw-r--r--arm_compute/runtime/NEON/functions/NEPoolingLayer.h2
5 files changed, 80 insertions, 6 deletions
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h
index 523649c65d..ba65926802 100644
--- a/arm_compute/core/NEON/NEMath.h
+++ b/arm_compute/core/NEON/NEMath.h
@@ -42,17 +42,23 @@ float32x4_t vfloorq_f32(float32x4_t val);
*
* @return The calculated inverse square root.
*/
-float32x4_t vinvsqrtq_f32(float32x4_t x);
+float32x2_t vinvsqrt_f32(float32x2_t x);
-#ifdef ARM_COMPUTE_ENABLE_FP16
/** Calculate inverse square root.
*
* @param[in] x Input value.
*
* @return The calculated inverse square root.
*/
-float16x8_t vinvsqrtq_f16(float16x8_t x);
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+float32x4_t vinvsqrtq_f32(float32x4_t x);
+
+/** Calculate reciprocal.
+ *
+ * @param[in] x Input value.
+ *
+ * @return The calculated reciprocal.
+ */
+float32x2_t vinv_f32(float32x2_t x);
/** Calculate reciprocal.
*
@@ -122,6 +128,31 @@ float32x4_t vpowq_f32(float32x4_t val, float32x4_t n);
* @return The calculated Hyperbolic Tangent.
*/
float16x8_t vtanhq_f16(float16x8_t val);
+
+/** Calculate reciprocal.
+ *
+ * @param[in] x Input value.
+ *
+ * @return The calculated reciprocal.
+ */
+float16x4_t vinv_f16(float16x4_t x);
+
+/** Calculate reciprocal.
+ *
+ * @param[in] x Input value.
+ *
+ * @return The calculated reciprocal.
+ */
+float16x8_t vinvq_f16(float16x8_t x);
+
+/** Calculate inverse square root.
+ *
+ * @param[in] x Input value.
+ *
+ * @return The calculated inverse square root.
+ */
+float16x4_t vinvsqrt_f16(float16x4_t x);
+
/** Calculate inverse square root.
*
* @param[in] x Input value.
@@ -129,6 +160,7 @@ float16x8_t vtanhq_f16(float16x8_t val);
* @return The calculated inverse square root.
*/
float16x8_t vinvsqrtq_f16(float16x8_t x);
+
/** Calculate exponential
*
* @param[in] x Input vector value in F16 format.
@@ -136,6 +168,7 @@ float16x8_t vinvsqrtq_f16(float16x8_t x);
* @return The calculated exponent.
*/
float16x8_t vexpq_f16(float16x8_t x);
+
/** Calculate n power of a number.
*
* pow(x,n) = e^(n*log(x))
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index bdd747c4e9..50d85396d4 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -64,6 +64,15 @@ inline float32x4_t vfloorq_f32(float32x4_t val)
return vbslq_f32(vcgtq_f32(r, val), vsubq_f32(r, CONST_1), r);
}
+inline float32x2_t vinvsqrt_f32(float32x2_t x)
+{
+ float32x2_t sqrt_reciprocal = vrsqrte_f32(x);
+ sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
+ sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
+
+ return sqrt_reciprocal;
+}
+
inline float32x4_t vinvsqrtq_f32(float32x4_t x)
{
float32x4_t sqrt_reciprocal = vrsqrteq_f32(x);
@@ -73,6 +82,14 @@ inline float32x4_t vinvsqrtq_f32(float32x4_t x)
return sqrt_reciprocal;
}
+inline float32x2_t vinv_f32(float32x2_t x)
+{
+ float32x2_t recip = vrecpe_f32(x);
+ recip = vmul_f32(vrecps_f32(x, recip), recip);
+ recip = vmul_f32(vrecps_f32(x, recip), recip);
+ return recip;
+}
+
inline float32x4_t vinvq_f32(float32x4_t x)
{
float32x4_t recip = vrecpeq_f32(x);
@@ -182,6 +199,14 @@ const std::array<float16x8_t, 8> log_tab_f16 =
}
};
+inline float16x4_t vinvsqrt_f16(float16x4_t x)
+{
+ float16x4_t sqrt_reciprocal = vrsqrte_f16(x);
+ sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
+ sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
+ return sqrt_reciprocal;
+}
+
inline float16x8_t vinvsqrtq_f16(float16x8_t x)
{
float16x8_t sqrt_reciprocal = vrsqrteq_f16(x);
@@ -190,6 +215,14 @@ inline float16x8_t vinvsqrtq_f16(float16x8_t x)
return sqrt_reciprocal;
}
+inline float16x4_t vinv_f16(float16x4_t x)
+{
+ float16x4_t recip = vrecpe_f16(x);
+ recip = vmul_f16(vrecps_f16(x, recip), recip);
+ recip = vmul_f16(vrecps_f16(x, recip), recip);
+ return recip;
+}
+
inline float16x8_t vinvq_f16(float16x8_t x)
{
float16x8_t recip = vrecpeq_f16(x);
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 7d9cd4e0cc..8750a9cf1f 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -398,7 +398,8 @@ enum class DimensionRoundingType
enum class PoolingType
{
MAX, /**< Max Pooling */
- AVG /**< Average Pooling */
+ AVG, /**< Average Pooling */
+ L2 /**< L2 Pooling */
};
/** Padding and stride information class */
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 39ec6587de..ab5d110f91 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -609,6 +609,13 @@ const std::string &string_from_border_mode(BorderMode border_mode);
* @return The string describing the normalization type.
*/
const std::string &string_from_norm_type(NormType type);
+/** Translates a given pooling type to a string.
+ *
+ * @param[in] type @ref PoolingType to be translated to string.
+ *
+ * @return The string describing the pooling type.
+ */
+const std::string &string_from_pooling_type(PoolingType type);
/** Lower a given string.
*
* @param[in] val Given string to lower.
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index 5a9cffa5ae..5c36e80f37 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -42,7 +42,7 @@ class NEPoolingLayer : public INESimpleFunction
public:
/** Set the input and output tensors.
*
- * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/F32.
+ * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QS16/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*/