diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2017-06-22 12:57:51 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:35:24 +0000 |
commit | bbd9fb95daa08d6da67c567b40ca2cd032f7a2d3 (patch) | |
tree | c1401585f64396d6f22bb790442d8183f3a17a9e /arm_compute/core | |
parent | 2eac5bd444d16e4e81c427d5a99e1534b387e211 (diff) | |
download | ComputeLibrary-bbd9fb95daa08d6da67c567b40ca2cd032f7a2d3.tar.gz |
COMPMID-412: Port PoolingLayer to use fixed point 16.
Change-Id: I2005de4c7c14526996309826d33a0ec8e732d2d5
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78720
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Steven Niu <steven.niu@arm.com>
Diffstat (limited to 'arm_compute/core')
-rw-r--r-- | arm_compute/core/FixedPoint.h | 18 | ||||
-rw-r--r-- | arm_compute/core/FixedPoint.inl | 16 | ||||
-rw-r--r-- | arm_compute/core/NEON/NEFixedPoint.inl | 40 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h | 16 |
4 files changed, 73 insertions, 17 deletions
diff --git a/arm_compute/core/FixedPoint.h b/arm_compute/core/FixedPoint.h index f166d93c3e..82c2d3347e 100644 --- a/arm_compute/core/FixedPoint.h +++ b/arm_compute/core/FixedPoint.h @@ -40,6 +40,24 @@ using qint64_t = int64_t; /**< 64 bit fixed point scalar value */ */ qint8_t sqshl_qs8(qint8_t a, int shift); +/** 8 bit fixed point scalar shift right + * + * @param[in] a First 8 bit fixed point input + * @param[in] shift Shift amount (positive only values) + * + * @return The result of the 8 bit fixed point shift + */ +qint8_t sshr_qs8(qint8_t a, int shift); + +/** 16 bit fixed point scalar shift right + * + * @param[in] a First 16 bit fixed point input + * @param[in] shift Shift amount (positive only values) + * + * @return The result of the 16 bit fixed point shift + */ +qint16_t sshr_qs16(qint16_t a, int shift); + /** 16 bit fixed point scalar saturating shift left * * @param[in] a First 16 bit fixed point input diff --git a/arm_compute/core/FixedPoint.inl b/arm_compute/core/FixedPoint.inl index b921b32ed9..5ea0f6c825 100644 --- a/arm_compute/core/FixedPoint.inl +++ b/arm_compute/core/FixedPoint.inl @@ -21,6 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/core/Error.h" + #include <cmath> #include <limits> @@ -59,6 +61,20 @@ inline qint16_t sqshl_qs16(qint16_t a, int shift) return saturate_convert<qint32_t, qint16_t>(tmp); } +inline qint8_t sshr_qs8(qint8_t a, int shift) +{ + ARM_COMPUTE_ERROR_ON_MSG(shift == 0, "Shift should not be zero"); + const qint8_t round_val = 1 << (shift - 1); + return sqadd_qs8(a, round_val) >> shift; +} + +inline qint16_t sshr_qs16(qint16_t a, int shift) +{ + ARM_COMPUTE_ERROR_ON_MSG(shift == 0, "Shift should not be zero"); + const qint16_t round_val = 1 << (shift - 1); + return sqadd_qs16(a, round_val) >> shift; +} + inline qint8_t sabs_qs8(qint8_t a) { return (a < 0) ? (a == std::numeric_limits<int8_t>::min()) ? std::numeric_limits<int8_t>::max() : -a : a; diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl index dd1066d6bc..a5d9e7685d 100644 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -25,8 +25,9 @@ namespace arm_compute { -/**< Exponent polynomial coefficients for 8 bit fixed point (8 elements) - * Format is in Q0.7 for all elements */ +/** Exponent polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements + */ static const std::array<qint8x8_t, 4> exp_tab_qs8 = { { @@ -37,8 +38,9 @@ static const std::array<qint8x8_t, 4> exp_tab_qs8 = } }; -/**< Exponent polynomial coefficients for 16 bit fixed point (4 elements) - * Format is in Q0.15 for all elements */ +/** Exponent polynomial coefficients for 16 bit fixed point (4 elements) + * Format is in Q0.15 for all elements + */ static const std::array<qint16x4_t, 4> exp_tab_qs16 = { { @@ -49,8 +51,9 @@ static const std::array<qint16x4_t, 4> exp_tab_qs16 = } }; -/**< Exponent polynomial coefficients for 8 bit fixed point (16 elements) - * Format is in Q0.7 for all elements */ +/** Exponent polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements + */ static const std::array<qint8x16_t, 4> exp_tabq_qs8 = { { @@ -61,8 +64,9 @@ static const std::array<qint8x16_t, 4> exp_tabq_qs8 = } }; -/**< Exponent polynomial coefficients for 16 bit fixed point (8 elements) - * Format is in Q0.15 for all elements */ +/** Exponent polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements + */ static const std::array<qint16x8_t, 4> exp_tabq_qs16 = { { @@ -73,8 +77,9 @@ static const std::array<qint16x8_t, 4> exp_tabq_qs16 = } }; -/**< Logarithm polynomial coefficients for 8 bit fixed point (8 elements) - * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +/** Logarithm polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 + */ static const std::array<qint8x8_t, 4> log_tab_qs8 = { { @@ -85,8 +90,9 @@ static const std::array<qint8x8_t, 4> log_tab_qs8 = } }; -/**< Logarithm polynomial coefficients for 16 bit fixed point (8 elements) - * Format is in Q0.15 for all elements except the first one which is in Q1.14 */ +/** Logarithm polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements except the first one which is in Q1.14 + */ static const std::array<qint16x4_t, 4> log_tab_qs16 = { { @@ -97,8 +103,9 @@ static const std::array<qint16x4_t, 4> log_tab_qs16 = } }; -/**< Logarithm polynomial coefficients for 8 bit fixed point (16 elements) - * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +/** Logarithm polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 + */ static const std::array<qint8x16_t, 4> log_tabq_qs8 = { { @@ -109,8 +116,9 @@ static const std::array<qint8x16_t, 4> log_tabq_qs8 = } }; -/**< Logarithm polynomial coefficients for 16 bit fixed point (8 elements) - * Format is in Q0.15 for all elements except the first one which is in Q1.14 */ +/** Logarithm polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements except the first one which is in Q1.14 + */ static const std::array<qint16x8_t, 4> log_tabq_qs16 = { { diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index a5de81137b..8a938a7f34 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -48,7 +48,7 @@ public: ~NEPoolingLayerKernel() = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8/F16/F32. + * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ @@ -81,6 +81,13 @@ private: */ template <PoolingType pooling_type> void pooling2_q8(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling for 16bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template <PoolingType pooling_type> + void pooling2_q16(const Window &window_input, const Window &window); /** Function to perform 3x3 pooling. * * @param[in] window_input Input region on which to execute the kernel. @@ -102,6 +109,13 @@ private: */ template <PoolingType pooling_type> void pooling3_q8(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling for 16bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template <PoolingType pooling_type> + void pooling3_q16(const Window &window_input, const Window &window); /** Function to perform 7x7 pooling. * * @param[in] window_input Input region on which to execute the kernel. |