From bbd9fb95daa08d6da67c567b40ca2cd032f7a2d3 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 22 Jun 2017 12:57:51 +0100 Subject: COMPMID-412: Port PoolingLayer to use fixed point 16. Change-Id: I2005de4c7c14526996309826d33a0ec8e732d2d5 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78720 Tested-by: Kaizen Reviewed-by: Steven Niu --- arm_compute/core/FixedPoint.h | 18 ++++++++++ arm_compute/core/FixedPoint.inl | 16 +++++++++ arm_compute/core/NEON/NEFixedPoint.inl | 40 +++++++++++++--------- .../core/NEON/kernels/NEPoolingLayerKernel.h | 16 ++++++++- 4 files changed, 73 insertions(+), 17 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/FixedPoint.h b/arm_compute/core/FixedPoint.h index f166d93c3e..82c2d3347e 100644 --- a/arm_compute/core/FixedPoint.h +++ b/arm_compute/core/FixedPoint.h @@ -40,6 +40,24 @@ using qint64_t = int64_t; /**< 64 bit fixed point scalar value */ */ qint8_t sqshl_qs8(qint8_t a, int shift); +/** 8 bit fixed point scalar shift right + * + * @param[in] a First 8 bit fixed point input + * @param[in] shift Shift amount (positive only values) + * + * @return The result of the 8 bit fixed point shift + */ +qint8_t sshr_qs8(qint8_t a, int shift); + +/** 16 bit fixed point scalar shift right + * + * @param[in] a First 16 bit fixed point input + * @param[in] shift Shift amount (positive only values) + * + * @return The result of the 16 bit fixed point shift + */ +qint16_t sshr_qs16(qint16_t a, int shift); + /** 16 bit fixed point scalar saturating shift left * * @param[in] a First 16 bit fixed point input diff --git a/arm_compute/core/FixedPoint.inl b/arm_compute/core/FixedPoint.inl index b921b32ed9..5ea0f6c825 100644 --- a/arm_compute/core/FixedPoint.inl +++ b/arm_compute/core/FixedPoint.inl @@ -21,6 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/core/Error.h" + #include #include @@ -59,6 +61,20 @@ inline qint16_t sqshl_qs16(qint16_t a, int shift) return saturate_convert(tmp); } +inline qint8_t sshr_qs8(qint8_t a, int shift) +{ + ARM_COMPUTE_ERROR_ON_MSG(shift == 0, "Shift should not be zero"); + const qint8_t round_val = 1 << (shift - 1); + return sqadd_qs8(a, round_val) >> shift; +} + +inline qint16_t sshr_qs16(qint16_t a, int shift) +{ + ARM_COMPUTE_ERROR_ON_MSG(shift == 0, "Shift should not be zero"); + const qint16_t round_val = 1 << (shift - 1); + return sqadd_qs16(a, round_val) >> shift; +} + inline qint8_t sabs_qs8(qint8_t a) { return (a < 0) ? (a == std::numeric_limits::min()) ? std::numeric_limits::max() : -a : a; diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl index dd1066d6bc..a5d9e7685d 100644 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -25,8 +25,9 @@ namespace arm_compute { -/**< Exponent polynomial coefficients for 8 bit fixed point (8 elements) - * Format is in Q0.7 for all elements */ +/** Exponent polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements + */ static const std::array exp_tab_qs8 = { { @@ -37,8 +38,9 @@ static const std::array exp_tab_qs8 = } }; -/**< Exponent polynomial coefficients for 16 bit fixed point (4 elements) - * Format is in Q0.15 for all elements */ +/** Exponent polynomial coefficients for 16 bit fixed point (4 elements) + * Format is in Q0.15 for all elements + */ static const std::array exp_tab_qs16 = { { @@ -49,8 +51,9 @@ static const std::array exp_tab_qs16 = } }; -/**< Exponent polynomial coefficients for 8 bit fixed point (16 elements) - * Format is in Q0.7 for all elements */ +/** Exponent polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements + */ static const std::array exp_tabq_qs8 = { { @@ -61,8 +64,9 @@ static const std::array exp_tabq_qs8 = } }; -/**< Exponent polynomial coefficients for 16 bit fixed point (8 elements) - * Format is in Q0.15 for all elements */ +/** Exponent polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements + */ static const std::array exp_tabq_qs16 = { { @@ -73,8 +77,9 @@ static const std::array exp_tabq_qs16 = } }; -/**< Logarithm polynomial coefficients for 8 bit fixed point (8 elements) - * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +/** Logarithm polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 + */ static const std::array log_tab_qs8 = { { @@ -85,8 +90,9 @@ static const std::array log_tab_qs8 = } }; -/**< Logarithm polynomial coefficients for 16 bit fixed point (8 elements) - * Format is in Q0.15 for all elements except the first one which is in Q1.14 */ +/** Logarithm polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements except the first one which is in Q1.14 + */ static const std::array log_tab_qs16 = { { @@ -97,8 +103,9 @@ static const std::array log_tab_qs16 = } }; -/**< Logarithm polynomial coefficients for 8 bit fixed point (16 elements) - * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +/** Logarithm polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 + */ static const std::array log_tabq_qs8 = { { @@ -109,8 +116,9 @@ static const std::array log_tabq_qs8 = } }; -/**< Logarithm polynomial coefficients for 16 bit fixed point (8 elements) - * Format is in Q0.15 for all elements except the first one which is in Q1.14 */ +/** Logarithm polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements except the first one which is in Q1.14 + */ static const std::array log_tabq_qs16 = { { diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index a5de81137b..8a938a7f34 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -48,7 +48,7 @@ public: ~NEPoolingLayerKernel() = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8/F16/F32. + * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ @@ -81,6 +81,13 @@ private: */ template void pooling2_q8(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling for 16bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling2_q16(const Window &window_input, const Window &window); /** Function to perform 3x3 pooling. * * @param[in] window_input Input region on which to execute the kernel. @@ -102,6 +109,13 @@ private: */ template void pooling3_q8(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling for 16bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling3_q16(const Window &window_input, const Window &window); /** Function to perform 7x7 pooling. * * @param[in] window_input Input region on which to execute the kernel. -- cgit v1.2.1