COMPMID-403: Add 7x7 NEON Pooling support.

Change-Id: I2f1e808884f215b9cf79e1f2015ef901e66b3e5f Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78146 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
author: Michele Di Giorgio <michele.digiorgio@arm.com> 2017-06-19 15:19:29 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-09-17 14:14:20 +0100
commit: 8af2dd6eb230f2205070dce50c2a22bdf2d55e46 (patch)
tree: b0d523617ae866495bc19c5ef3a41b5545eada76 /arm_compute/core/NEON
parent: c6cb35a1935cde168f4b72d8782c21a344e78623 (diff)
download: ComputeLibrary-8af2dd6eb230f2205070dce50c2a22bdf2d55e46.tar.gz
3 files changed, 28 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h
index fb712611cb..201c5b5e7e 100644
--- a/arm_compute/core/NEON/NEFixedPoint.h
+++ b/arm_compute/core/NEON/NEFixedPoint.h
@@ -681,6 +681,15 @@ qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position);
  * @return The result of the 8bit power.
  */
 qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position);
+
+/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
+ *
+ * @param[in] a Float input vector
+ * @param[in] b Float input vector
+ *
+ * @return The lane-by-lane maximum -> float32x4x2
+ */
+float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
 }
 #include "arm_compute/core/NEON/NEFixedPoint.inl"
 #endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */
diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl
index 6db344dc11..b57fd3edd2 100644
--- a/arm_compute/core/NEON/NEFixedPoint.inl
+++ b/arm_compute/core/NEON/NEFixedPoint.inl
@@ -1015,4 +1015,16 @@ inline qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_positio
 {
     return vqexpq_qs8(vqmulq_qs8(b, vlogq_qs8(a, fixed_point_position), fixed_point_position), fixed_point_position);
 }
+
+inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b)
+{
+    float32x4x2_t res =
+    {
+        {
+            vmaxq_f32(a.val[0], b.val[0]),
+            vmaxq_f32(a.val[1], b.val[1])
+        }
+    };
+    return res;
+}
 }
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
index 62a087841a..bf06fdd639 100644
--- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
@@ -87,6 +87,13 @@ private:
      */
     template <PoolingType pooling_type>
     void pooling3_q8(const Window &window_input, const Window &window);
+    /** Function to perform 7x7 pooling.
+     *
+     * @param[in] window_input Input region on which to execute the kernel.
+     * @param[in] window       Output region on which to execute the kernel.
+     */
+    template <PoolingType pooling_type>
+    void pooling7_f32(const Window &window_input, const Window &window);
     /** Common signature for all the specialised Pooling functions
      *
      * @param[in] window_input Input region on which to execute the kernel.
author	Michele Di Giorgio <michele.digiorgio@arm.com>	2017-06-19 15:19:29 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-09-17 14:14:20 +0100
commit	8af2dd6eb230f2205070dce50c2a22bdf2d55e46 (patch)
tree	b0d523617ae866495bc19c5ef3a41b5545eada76 /arm_compute/core/NEON
parent	c6cb35a1935cde168f4b72d8782c21a344e78623 (diff)
download	ComputeLibrary-8af2dd6eb230f2205070dce50c2a22bdf2d55e46.tar.gz