From 8af2dd6eb230f2205070dce50c2a22bdf2d55e46 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Mon, 19 Jun 2017 15:19:29 +0100 Subject: COMPMID-403: Add 7x7 NEON Pooling support. Change-Id: I2f1e808884f215b9cf79e1f2015ef901e66b3e5f Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78146 Reviewed-by: Georgios Pinitas Tested-by: Kaizen --- arm_compute/core/NEON/NEFixedPoint.h | 9 +++++++++ arm_compute/core/NEON/NEFixedPoint.inl | 12 ++++++++++++ arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h | 7 +++++++ 3 files changed, 28 insertions(+) (limited to 'arm_compute/core') diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h index fb712611cb..201c5b5e7e 100644 --- a/arm_compute/core/NEON/NEFixedPoint.h +++ b/arm_compute/core/NEON/NEFixedPoint.h @@ -681,6 +681,15 @@ qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position); * @return The result of the 8bit power. */ qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position); + +/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements + * + * @param[in] a Float input vector + * @param[in] b Float input vector + * + * @return The lane-by-lane maximum -> float32x4x2 + */ +float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b); } #include "arm_compute/core/NEON/NEFixedPoint.inl" #endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */ diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl index 6db344dc11..b57fd3edd2 100644 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -1015,4 +1015,16 @@ inline qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_positio { return vqexpq_qs8(vqmulq_qs8(b, vlogq_qs8(a, fixed_point_position), fixed_point_position), fixed_point_position); } + +inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b) +{ + float32x4x2_t res = + { + { + vmaxq_f32(a.val[0], b.val[0]), + vmaxq_f32(a.val[1], b.val[1]) + } + }; + return res; +} } diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index 62a087841a..bf06fdd639 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -87,6 +87,13 @@ private: */ template void pooling3_q8(const Window &window_input, const Window &window); + /** Function to perform 7x7 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling7_f32(const Window &window_input, const Window &window); /** Common signature for all the specialised Pooling functions * * @param[in] window_input Input region on which to execute the kernel. -- cgit v1.2.1