diff options
Diffstat (limited to 'arm_compute/core/NEON')
-rw-r--r-- | arm_compute/core/NEON/NEFixedPoint.inl | 40 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h | 16 |
2 files changed, 39 insertions, 17 deletions
diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl index dd1066d6bc..a5d9e7685d 100644 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -25,8 +25,9 @@ namespace arm_compute { -/**< Exponent polynomial coefficients for 8 bit fixed point (8 elements) - * Format is in Q0.7 for all elements */ +/** Exponent polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements + */ static const std::array<qint8x8_t, 4> exp_tab_qs8 = { { @@ -37,8 +38,9 @@ static const std::array<qint8x8_t, 4> exp_tab_qs8 = } }; -/**< Exponent polynomial coefficients for 16 bit fixed point (4 elements) - * Format is in Q0.15 for all elements */ +/** Exponent polynomial coefficients for 16 bit fixed point (4 elements) + * Format is in Q0.15 for all elements + */ static const std::array<qint16x4_t, 4> exp_tab_qs16 = { { @@ -49,8 +51,9 @@ static const std::array<qint16x4_t, 4> exp_tab_qs16 = } }; -/**< Exponent polynomial coefficients for 8 bit fixed point (16 elements) - * Format is in Q0.7 for all elements */ +/** Exponent polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements + */ static const std::array<qint8x16_t, 4> exp_tabq_qs8 = { { @@ -61,8 +64,9 @@ static const std::array<qint8x16_t, 4> exp_tabq_qs8 = } }; -/**< Exponent polynomial coefficients for 16 bit fixed point (8 elements) - * Format is in Q0.15 for all elements */ +/** Exponent polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements + */ static const std::array<qint16x8_t, 4> exp_tabq_qs16 = { { @@ -73,8 +77,9 @@ static const std::array<qint16x8_t, 4> exp_tabq_qs16 = } }; -/**< Logarithm polynomial coefficients for 8 bit fixed point (8 elements) - * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +/** Logarithm polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 + */ static const std::array<qint8x8_t, 4> log_tab_qs8 = { { @@ -85,8 +90,9 @@ static const std::array<qint8x8_t, 4> log_tab_qs8 = } }; -/**< Logarithm polynomial coefficients for 16 bit fixed point (8 elements) - * Format is in Q0.15 for all elements except the first one which is in Q1.14 */ +/** Logarithm polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements except the first one which is in Q1.14 + */ static const std::array<qint16x4_t, 4> log_tab_qs16 = { { @@ -97,8 +103,9 @@ static const std::array<qint16x4_t, 4> log_tab_qs16 = } }; -/**< Logarithm polynomial coefficients for 8 bit fixed point (16 elements) - * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +/** Logarithm polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 + */ static const std::array<qint8x16_t, 4> log_tabq_qs8 = { { @@ -109,8 +116,9 @@ static const std::array<qint8x16_t, 4> log_tabq_qs8 = } }; -/**< Logarithm polynomial coefficients for 16 bit fixed point (8 elements) - * Format is in Q0.15 for all elements except the first one which is in Q1.14 */ +/** Logarithm polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements except the first one which is in Q1.14 + */ static const std::array<qint16x8_t, 4> log_tabq_qs16 = { { diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index a5de81137b..8a938a7f34 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -48,7 +48,7 @@ public: ~NEPoolingLayerKernel() = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8/F16/F32. + * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ @@ -81,6 +81,13 @@ private: */ template <PoolingType pooling_type> void pooling2_q8(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling for 16bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template <PoolingType pooling_type> + void pooling2_q16(const Window &window_input, const Window &window); /** Function to perform 3x3 pooling. * * @param[in] window_input Input region on which to execute the kernel. @@ -102,6 +109,13 @@ private: */ template <PoolingType pooling_type> void pooling3_q8(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling for 16bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template <PoolingType pooling_type> + void pooling3_q16(const Window &window_input, const Window &window); /** Function to perform 7x7 pooling. * * @param[in] window_input Input region on which to execute the kernel. |