aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2017-06-19 15:19:29 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:14:20 +0100
commit8af2dd6eb230f2205070dce50c2a22bdf2d55e46 (patch)
treeb0d523617ae866495bc19c5ef3a41b5545eada76 /arm_compute/core/NEON
parentc6cb35a1935cde168f4b72d8782c21a344e78623 (diff)
downloadComputeLibrary-8af2dd6eb230f2205070dce50c2a22bdf2d55e46.tar.gz
COMPMID-403: Add 7x7 NEON Pooling support.
Change-Id: I2f1e808884f215b9cf79e1f2015ef901e66b3e5f Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78146 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute/core/NEON')
-rw-r--r--arm_compute/core/NEON/NEFixedPoint.h9
-rw-r--r--arm_compute/core/NEON/NEFixedPoint.inl12
-rw-r--r--arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h7
3 files changed, 28 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h
index fb712611cb..201c5b5e7e 100644
--- a/arm_compute/core/NEON/NEFixedPoint.h
+++ b/arm_compute/core/NEON/NEFixedPoint.h
@@ -681,6 +681,15 @@ qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position);
* @return The result of the 8bit power.
*/
qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position);
+
+/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
+ *
+ * @param[in] a Float input vector
+ * @param[in] b Float input vector
+ *
+ * @return The lane-by-lane maximum -> float32x4x2
+ */
+float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
}
#include "arm_compute/core/NEON/NEFixedPoint.inl"
#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */
diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl
index 6db344dc11..b57fd3edd2 100644
--- a/arm_compute/core/NEON/NEFixedPoint.inl
+++ b/arm_compute/core/NEON/NEFixedPoint.inl
@@ -1015,4 +1015,16 @@ inline qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_positio
{
return vqexpq_qs8(vqmulq_qs8(b, vlogq_qs8(a, fixed_point_position), fixed_point_position), fixed_point_position);
}
+
+inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b)
+{
+ float32x4x2_t res =
+ {
+ {
+ vmaxq_f32(a.val[0], b.val[0]),
+ vmaxq_f32(a.val[1], b.val[1])
+ }
+ };
+ return res;
+}
}
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
index 62a087841a..bf06fdd639 100644
--- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
@@ -87,6 +87,13 @@ private:
*/
template <PoolingType pooling_type>
void pooling3_q8(const Window &window_input, const Window &window);
+ /** Function to perform 7x7 pooling.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ */
+ template <PoolingType pooling_type>
+ void pooling7_f32(const Window &window_input, const Window &window);
/** Common signature for all the specialised Pooling functions
*
* @param[in] window_input Input region on which to execute the kernel.