diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2017-06-19 15:19:29 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:14:20 +0100 |
commit | 8af2dd6eb230f2205070dce50c2a22bdf2d55e46 (patch) | |
tree | b0d523617ae866495bc19c5ef3a41b5545eada76 /arm_compute/core/NEON | |
parent | c6cb35a1935cde168f4b72d8782c21a344e78623 (diff) | |
download | ComputeLibrary-8af2dd6eb230f2205070dce50c2a22bdf2d55e46.tar.gz |
COMPMID-403: Add 7x7 NEON Pooling support.
Change-Id: I2f1e808884f215b9cf79e1f2015ef901e66b3e5f
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78146
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute/core/NEON')
-rw-r--r-- | arm_compute/core/NEON/NEFixedPoint.h | 9 | ||||
-rw-r--r-- | arm_compute/core/NEON/NEFixedPoint.inl | 12 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h | 7 |
3 files changed, 28 insertions, 0 deletions
diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h index fb712611cb..201c5b5e7e 100644 --- a/arm_compute/core/NEON/NEFixedPoint.h +++ b/arm_compute/core/NEON/NEFixedPoint.h @@ -681,6 +681,15 @@ qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position); * @return The result of the 8bit power. */ qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position); + +/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements + * + * @param[in] a Float input vector + * @param[in] b Float input vector + * + * @return The lane-by-lane maximum -> float32x4x2 + */ +float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b); } #include "arm_compute/core/NEON/NEFixedPoint.inl" #endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */ diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl index 6db344dc11..b57fd3edd2 100644 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -1015,4 +1015,16 @@ inline qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_positio { return vqexpq_qs8(vqmulq_qs8(b, vlogq_qs8(a, fixed_point_position), fixed_point_position), fixed_point_position); } + +inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b) +{ + float32x4x2_t res = + { + { + vmaxq_f32(a.val[0], b.val[0]), + vmaxq_f32(a.val[1], b.val[1]) + } + }; + return res; +} } diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index 62a087841a..bf06fdd639 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -87,6 +87,13 @@ private: */ template <PoolingType pooling_type> void pooling3_q8(const Window &window_input, const Window &window); + /** Function to perform 7x7 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template <PoolingType pooling_type> + void pooling7_f32(const Window &window_input, const Window &window); /** Common signature for all the specialised Pooling functions * * @param[in] window_input Input region on which to execute the kernel. |