From 0a8334cb78dae66fdc31257a96ba15f7c41bde50 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 14 Jun 2017 18:00:05 +0100 Subject: COMPMID-400 Add support for 16 bit fixed point arithmetic. Change-Id: Iebfaef1b219d80d6362b7fd4b1357612b31e43cb Reviewed-on: http://mpd-gerrit.cambridge.arm.com/77749 Reviewed-by: Moritz Pflanzer Tested-by: Kaizen --- arm_compute/core/FixedPoint.h | 115 ++- arm_compute/core/FixedPoint.inl | 197 ++++- arm_compute/core/NEON/NEFixedPoint.h | 531 +++++++++++- arm_compute/core/NEON/NEFixedPoint.inl | 931 ++++++++++++++++++++- tests/Utils.h | 1 + tests/validation/FixedPoint.h | 5 +- tests/validation/NEON/Fixedpoint/Exp_QS16.cpp | 124 +++ tests/validation/NEON/Fixedpoint/Exp_QS8.cpp | 4 +- tests/validation/NEON/Fixedpoint/Invsqrt_QS16.cpp | 122 +++ tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp | 9 +- tests/validation/NEON/Fixedpoint/Log_QS16.cpp | 123 +++ tests/validation/NEON/Fixedpoint/Log_QS8.cpp | 4 +- .../validation/NEON/Fixedpoint/Reciprocal_QS16.cpp | 123 +++ .../validation/NEON/Fixedpoint/Reciprocal_QS8.cpp | 4 +- tests/validation/Reference.cpp | 10 +- tests/validation/TensorFactory.h | 1 + tests/validation/Validation.cpp | 2 + 17 files changed, 2189 insertions(+), 117 deletions(-) create mode 100644 tests/validation/NEON/Fixedpoint/Exp_QS16.cpp create mode 100644 tests/validation/NEON/Fixedpoint/Invsqrt_QS16.cpp create mode 100644 tests/validation/NEON/Fixedpoint/Log_QS16.cpp create mode 100644 tests/validation/NEON/Fixedpoint/Reciprocal_QS16.cpp diff --git a/arm_compute/core/FixedPoint.h b/arm_compute/core/FixedPoint.h index 925b4949a3..da304c6329 100644 --- a/arm_compute/core/FixedPoint.h +++ b/arm_compute/core/FixedPoint.h @@ -33,12 +33,21 @@ using qint32_t = int32_t; /**< 32 bit fixed point scalar value */ /** 8 bit fixed point scalar saturating shift left * * @param[in] a First 8 bit fixed point input - * @param[in] shift Shift amount + * @param[in] shift Shift amount (positive only values) * * @return The result of the 8 bit fixed point shift. The result is saturated in case of overflow */ qint8_t sqshl_qs8(qint8_t a, int shift); +/** 16 bit fixed point scalar saturating shift left + * + * @param[in] a First 16 bit fixed point input + * @param[in] shift Shift amount (positive only values) + * + * @return The result of the 16 bit fixed point shift. The result is saturated in case of overflow + */ +qint16_t sqshl_qs16(qint16_t a, int shift); + /** 8 bit fixed point scalar absolute value * * @param[in] a 8 bit fixed point input @@ -47,6 +56,14 @@ qint8_t sqshl_qs8(qint8_t a, int shift); */ qint8_t sabs_qs8(qint8_t a); +/** 16 bit fixed point scalar absolute value + * + * @param[in] a 16 bit fixed point input + * + * @return The result of the 16 bit fixed point absolute value + */ +qint16_t sabs_qs16(qint16_t a); + /** 8 bit fixed point scalar add * * @param[in] a First 8 bit fixed point input @@ -56,6 +73,15 @@ qint8_t sabs_qs8(qint8_t a); */ qint8_t sadd_qs8(qint8_t a, qint8_t b); +/** 16 bit fixed point scalar add + * + * @param[in] a First 16 bit fixed point input + * @param[in] b Second 16 bit fixed point input + * + * @return The result of the 16 bit fixed point addition + */ +qint16_t sadd_qs16(qint16_t a, qint16_t b); + /** 8 bit fixed point scalar saturating add * * @param[in] a First 8 bit fixed point input @@ -83,6 +109,15 @@ qint16_t sqadd_qs16(qint16_t a, qint16_t b); */ qint8_t ssub_qs8(qint8_t a, qint8_t b); +/** 16 bit fixed point scalar subtraction + * + * @param[in] a First 16 bit fixed point input + * @param[in] b Second 16 bit fixed point input + * + * @return The result of the 16 bit fixed point subtraction + */ +qint16_t ssub_qs16(qint16_t a, qint16_t b); + /** 8 bit fixed point scalar saturating subtraction * * @param[in] a First 8 bit fixed point input @@ -92,6 +127,15 @@ qint8_t ssub_qs8(qint8_t a, qint8_t b); */ qint8_t sqsub_qs8(qint8_t a, qint8_t b); +/** 16 bit fixed point scalar saturating subtraction + * + * @param[in] a First 16 bit fixed point input + * @param[in] b Second 16 bit fixed point input + * + * @return The result of the 16 bit fixed point subtraction. The result is saturated in case of overflow + */ +qint16_t sqsub_qs16(qint16_t a, qint16_t b); + /** 8 bit fixed point scalar multiply * * @param[in] a First 8 bit fixed point input @@ -102,6 +146,16 @@ qint8_t sqsub_qs8(qint8_t a, qint8_t b); */ qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position); +/** 16 bit fixed point scalar multiply + * + * @param[in] a First 16 bit fixed point input + * @param[in] b Second 16 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point multiplication. + */ +qint16_t smul_qs16(qint16_t a, qint16_t b, int fixed_point_position); + /** 8 bit fixed point scalar saturating multiply * * @param[in] a First 8 bit fixed point input @@ -112,6 +166,16 @@ qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position); */ qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position); +/** 16 bit fixed point scalar saturating multiply + * + * @param[in] a First 16 bit fixed point input + * @param[in] b Second 16 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point multiplication. The result is saturated in case of overflow + */ +qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position); + /** 8 bit fixed point scalar multiply long * * @param[in] a First 8 bit fixed point input @@ -122,6 +186,16 @@ qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position); */ qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position); +/** 16 bit fixed point scalar multiply long + * + * @param[in] a First 16 bit fixed point input + * @param[in] b Second 16 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point multiplication long. The result is saturated in case of overflow + */ +qint32_t sqmull_qs16(qint16_t a, qint16_t b, int fixed_point_position); + /** 16 bit fixed point scalar saturating multiply * * @param[in] a First 16 bit fixed point input @@ -141,6 +215,15 @@ qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position); */ qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position); +/** 16 bit fixed point scalar inverse square root +* +* @param[in] a 16 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 16 bit fixed point inverse square root. +*/ +qint16_t sinvsqrt_qs16(qint16_t a, int fixed_point_position); + /** 8 bit fixed point scalar division * * @param[in] a First 8 bit fixed point input @@ -151,6 +234,16 @@ qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position); */ qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position); +/** 16 bit fixed point scalar division +* +* @param[in] a First 16 bit fixed point input +* @param[in] b Second 16 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 16 bit fixed point division. +*/ +qint16_t sdiv_qs16(qint16_t a, qint16_t b, int fixed_point_position); + /** 8 bit fixed point scalar exponential * * @param[in] a 8 bit fixed point input @@ -160,6 +253,15 @@ qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position); */ qint8_t sexp_qs8(qint8_t a, int fixed_point_position); +/** 16 bit fixed point scalar exponential +* +* @param[in] a 16 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 16 bit fixed point exponential. +*/ +qint16_t sexp_qs16(qint16_t a, int fixed_point_position); + /** 8 bit fixed point scalar logarithm * * @param[in] a 8 bit fixed point input @@ -169,6 +271,15 @@ qint8_t sexp_qs8(qint8_t a, int fixed_point_position); */ qint8_t slog_qs8(qint8_t a, int fixed_point_position); +/** 16 bit fixed point scalar logarithm +* +* @param[in] a 16 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 16 bit fixed point logarithm. +*/ +qint16_t slog_qs16(qint16_t a, int fixed_point_position); + /** Convert an 8 bit fixed point to float * * @param[in] a Input to convert @@ -203,7 +314,7 @@ float scvt_f32_qs16(qint16_t a, int fixed_point_position); * * @return The result of the conversion float -> 16 bit fixed point */ -qint8_t scvt_qs16_f32(float a, int fixed_point_position); +qint16_t scvt_qs16_f32(float a, int fixed_point_position); /** Scalar saturating move and narrow. * diff --git a/arm_compute/core/FixedPoint.inl b/arm_compute/core/FixedPoint.inl index 4263a6f00d..fab91d6699 100644 --- a/arm_compute/core/FixedPoint.inl +++ b/arm_compute/core/FixedPoint.inl @@ -46,13 +46,27 @@ namespace arm_compute inline qint8_t sqshl_qs8(qint8_t a, int shift) { qint16_t tmp = static_cast(a) << shift; + // Saturate the result in case of overflow and cast to qint8_t return saturate_convert(tmp); } +inline qint16_t sqshl_qs16(qint16_t a, int shift) +{ + qint32_t tmp = static_cast(a) << shift; + + // Saturate the result in case of overflow and cast to qint16_t + return saturate_convert(tmp); +} + inline qint8_t sabs_qs8(qint8_t a) { - return a & 0x7F; + return (a < 0) ? (a == std::numeric_limits::min()) ? std::numeric_limits::max() : -a : a; +} + +inline qint16_t sabs_qs16(qint16_t a) +{ + return (a < 0) ? (a == std::numeric_limits::min()) ? std::numeric_limits::max() : -a : a; } inline qint8_t sadd_qs8(qint8_t a, qint8_t b) @@ -60,6 +74,11 @@ inline qint8_t sadd_qs8(qint8_t a, qint8_t b) return a + b; } +inline qint16_t sadd_qs16(qint16_t a, qint16_t b) +{ + return a + b; +} + inline qint8_t sqadd_qs8(qint8_t a, qint8_t b) { // We need to store the temporary result in qint16_t otherwise we cannot evaluate the overflow @@ -83,6 +102,11 @@ inline qint8_t ssub_qs8(qint8_t a, qint8_t b) return a - b; } +inline qint16_t ssub_qs16(qint16_t a, qint16_t b) +{ + return a - b; +} + inline qint8_t sqsub_qs8(qint8_t a, qint8_t b) { // We need to store the temporary result in uint16_t otherwise we cannot evaluate the overflow @@ -92,6 +116,15 @@ inline qint8_t sqsub_qs8(qint8_t a, qint8_t b) return saturate_convert(tmp); } +inline qint16_t sqsub_qs16(qint16_t a, qint16_t b) +{ + // We need to store the temporary result in qint32_t otherwise we cannot evaluate the overflow + qint32_t tmp = static_cast(a) - static_cast(b); + + // Saturate the result in case of overflow and cast to qint16_t + return saturate_convert(tmp); +} + inline qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position) { const qint16_t round_up_const = (1 << (fixed_point_position - 1)); @@ -104,6 +137,18 @@ inline qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position) return static_cast(tmp >> fixed_point_position); } +inline qint16_t smul_qs16(qint16_t a, qint16_t b, int fixed_point_position) +{ + const qint32_t round_up_const = (1 << (fixed_point_position - 1)); + + qint32_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return static_cast(tmp >> fixed_point_position); +} + inline qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position) { const qint16_t round_up_const = (1 << (fixed_point_position - 1)); @@ -140,16 +185,28 @@ inline qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position) return tmp >> fixed_point_position; } +inline qint32_t sqmull_qs16(qint16_t a, qint16_t b, int fixed_point_position) +{ + const qint32_t round_up_const = (1 << (fixed_point_position - 1)); + + qint32_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return tmp >> fixed_point_position; +} + inline qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position) { - qint8_t shift = 8 - (fixed_point_position + (__builtin_clz(a) - 24)); + const qint8_t shift = 8 - (fixed_point_position + (__builtin_clz(a) - 24)); - qint8_t const_three = (3 << fixed_point_position); - qint8_t temp = shift < 0 ? (a << -shift) : (a >> shift); - qint8_t x2 = temp; + const qint8_t const_three = (3 << fixed_point_position); + qint8_t temp = shift < 0 ? (a << -shift) : (a >> shift); + qint8_t x2 = temp; // We need three iterations to find the result - for(int i = 0; i < 3; i++) + for(int i = 0; i < 3; ++i) { qint8_t three_minus_dx = ssub_qs8(const_three, smul_qs8(temp, smul_qs8(x2, x2, fixed_point_position), fixed_point_position)); x2 = (smul_qs8(x2, three_minus_dx, fixed_point_position) >> 1); @@ -160,35 +217,84 @@ inline qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position) return temp; } +inline qint16_t sinvsqrt_qs16(qint16_t a, int fixed_point_position) +{ + const qint16_t shift = 16 - (fixed_point_position + (__builtin_clz(a) - 16)); + + const qint16_t const_three = (3 << fixed_point_position); + qint16_t temp = shift < 0 ? (a << -shift) : (a >> shift); + qint16_t x2 = temp; + + // We need three iterations to find the result + for(int i = 0; i < 3; ++i) + { + qint16_t three_minus_dx = ssub_qs16(const_three, smul_qs16(temp, smul_qs16(x2, x2, fixed_point_position), fixed_point_position)); + x2 = smul_qs16(x2, three_minus_dx, fixed_point_position) >> 1; + } + + temp = shift < 0 ? (x2 << ((-shift) >> 1)) : (x2 >> (shift >> 1)); + + return temp; +} + inline qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position) { - qint16_t temp = a << fixed_point_position; - return (qint8_t)(temp / b); + const qint16_t temp = a << fixed_point_position; + return static_cast(temp / b); +} + +inline qint16_t sdiv_qs16(qint16_t a, qint16_t b, int fixed_point_position) +{ + const qint32_t temp = a << fixed_point_position; + return static_cast(temp / b); } inline qint8_t sqexp_qs8(qint8_t a, int fixed_point_position) { // Constants - qint8_t const_one = (1 << fixed_point_position); - qint8_t ln2 = ((0x58 >> (6 - fixed_point_position)) + 1) >> 1; - qint8_t inv_ln2 = (((0x38 >> (6 - fixed_point_position)) + 1) >> 1) | const_one; - qint8_t A = ((0x7F >> (6 - fixed_point_position)) + 1) >> 1; - qint8_t B = ((0x3F >> (6 - fixed_point_position)) + 1) >> 1; - qint8_t C = ((0x16 >> (6 - fixed_point_position)) + 1) >> 1; - qint8_t D = ((0x05 >> (6 - fixed_point_position)) + 1) >> 1; + const qint8_t const_one = (1 << fixed_point_position); + const qint8_t ln2 = ((0x58 >> (6 - fixed_point_position)) + 1) >> 1; + const qint8_t inv_ln2 = (((0x38 >> (6 - fixed_point_position)) + 1) >> 1) | const_one; + const qint8_t A = ((0x7F >> (6 - fixed_point_position)) + 1) >> 1; + const qint8_t B = ((0x3F >> (6 - fixed_point_position)) + 1) >> 1; + const qint8_t C = ((0x16 >> (6 - fixed_point_position)) + 1) >> 1; + const qint8_t D = ((0x05 >> (6 - fixed_point_position)) + 1) >> 1; // Polynomial expansion - int dec_a = (sqmul_qs8(a, inv_ln2, fixed_point_position) >> fixed_point_position); - qint8_t alpha = sabs_qs8(sqsub_qs8(a, sqmul_qs8(ln2, sqshl_qs8(dec_a, fixed_point_position), fixed_point_position))); - qint8_t sum = sqadd_qs8(sqmul_qs8(alpha, D, fixed_point_position), C); - sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), B); - sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), A); - sum = sqmul_qs8(alpha, sum, fixed_point_position); - sum = sqadd_qs8(sum, const_one); + const int dec_a = (sqmul_qs8(a, inv_ln2, fixed_point_position) >> fixed_point_position); + const qint8_t alpha = sabs_qs8(sqsub_qs8(a, sqmul_qs8(ln2, sqshl_qs8(dec_a, fixed_point_position), fixed_point_position))); + qint8_t sum = sqadd_qs8(sqmul_qs8(alpha, D, fixed_point_position), C); + sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), B); + sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), A); + sum = sqmul_qs8(alpha, sum, fixed_point_position); + sum = sqadd_qs8(sum, const_one); return (dec_a < 0) ? (sum >> -dec_a) : sqshl_qs8(sum, dec_a); } +inline qint16_t sqexp_qs16(qint16_t a, int fixed_point_position) +{ + // Constants + const qint16_t const_one = (1 << fixed_point_position); + const qint16_t ln2 = ((0x58B9 >> (14 - fixed_point_position)) + 1) >> 1; + const qint16_t inv_ln2 = (((0x38AA >> (14 - fixed_point_position)) + 1) >> 1) | const_one; + const qint16_t A = ((0x7FBA >> (14 - fixed_point_position)) + 1) >> 1; + const qint16_t B = ((0x3FE9 >> (14 - fixed_point_position)) + 1) >> 1; + const qint16_t C = ((0x1693 >> (14 - fixed_point_position)) + 1) >> 1; + const qint16_t D = ((0x0592 >> (14 - fixed_point_position)) + 1) >> 1; + + // Polynomial expansion + const int dec_a = (sqmul_qs16(a, inv_ln2, fixed_point_position) >> fixed_point_position); + const qint16_t alpha = sabs_qs16(sqsub_qs16(a, sqmul_qs16(ln2, sqshl_qs16(dec_a, fixed_point_position), fixed_point_position))); + qint16_t sum = sqadd_qs16(sqmul_qs16(alpha, D, fixed_point_position), C); + sum = sqadd_qs16(sqmul_qs16(alpha, sum, fixed_point_position), B); + sum = sqadd_qs16(sqmul_qs16(alpha, sum, fixed_point_position), A); + sum = sqmul_qs16(alpha, sum, fixed_point_position); + sum = sqadd_qs16(sum, const_one); + + return (dec_a < 0) ? (sum >> -dec_a) : sqshl_qs16(sum, dec_a); +} + inline qint8_t slog_qs8(qint8_t a, int fixed_point_position) { // Constants @@ -214,14 +320,47 @@ inline qint8_t slog_qs8(qint8_t a, int fixed_point_position) a = ssub_qs8(a, const_one); // Polynomial expansion - auto sum = sqadd_qs8(sqmul_qs8(a, D, fixed_point_position), C); - sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), B); - sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), A); - sum = sqmul_qs8(a, sum, fixed_point_position); + qint8_t sum = sqadd_qs8(sqmul_qs8(a, D, fixed_point_position), C); + sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), B); + sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), A); + sum = sqmul_qs8(a, sum, fixed_point_position); return smul_qs8(sadd_qs8(sum, shift_val << fixed_point_position), ln2, fixed_point_position); } +inline qint16_t slog_qs16(qint16_t a, int fixed_point_position) +{ + // Constants + qint16_t const_one = (1 << fixed_point_position); + qint16_t ln2 = (0x58B9 >> (7 - fixed_point_position)); + qint16_t A = (0x5C0F >> (7 - fixed_point_position - 1)); + qint16_t B = -(0x56AE >> (7 - fixed_point_position)); + qint16_t C = (0x2933 >> (7 - fixed_point_position)); + qint16_t D = -(0x0AA7 >> (7 - fixed_point_position)); + + if((const_one == a) || (a < 0)) + { + return 0; + } + else if(a < const_one) + { + return -slog_qs16(sdiv_qs16(const_one, a, fixed_point_position), fixed_point_position); + } + + // Remove even powers of 2 + qint16_t shift_val = 31 - __builtin_clz(a >> fixed_point_position); + a >>= shift_val; + a = ssub_qs16(a, const_one); + + // Polynomial expansion + qint16_t sum = sqadd_qs16(sqmul_qs16(a, D, fixed_point_position), C); + sum = sqadd_qs16(sqmul_qs16(a, sum, fixed_point_position), B); + sum = sqadd_qs16(sqmul_qs16(a, sum, fixed_point_position), A); + sum = sqmul_qs16(a, sum, fixed_point_position); + + return smul_qs16(sadd_qs16(sum, shift_val << fixed_point_position), ln2, fixed_point_position); +} + inline float scvt_f32_qs8(qint8_t a, int fixed_point_position) { return static_cast(a) / (1 << fixed_point_position); @@ -230,7 +369,7 @@ inline float scvt_f32_qs8(qint8_t a, int fixed_point_position) inline qint8_t scvt_qs8_f32(float a, int fixed_point_position) { // round_nearest_integer(a * 2^(fixed_point_position)) - return static_cast(static_cast(a) * (1 << fixed_point_position) + 0.5f); + return static_cast(a * (1 << fixed_point_position) + 0.5f); } inline float scvt_f32_qs16(qint16_t a, int fixed_point_position) @@ -238,10 +377,10 @@ inline float scvt_f32_qs16(qint16_t a, int fixed_point_position) return static_cast(a) / (1 << fixed_point_position); } -inline qint8_t scvt_qs16_f32(float a, int fixed_point_position) +inline qint16_t scvt_qs16_f32(float a, int fixed_point_position) { // round_nearest_integer(a * 2^(fixed_point_position)) - return static_cast(static_cast(a) * (1 << fixed_point_position) + 0.5f); + return static_cast(a * (1 << fixed_point_position) + 0.5f); } inline qint8_t sqmovn_qs16(qint16_t a) diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h index 201c5b5e7e..660464eb62 100644 --- a/arm_compute/core/NEON/NEFixedPoint.h +++ b/arm_compute/core/NEON/NEFixedPoint.h @@ -46,6 +46,7 @@ using qint16x8_t = int16x8_t; /**< 16 bit fixed point vector with 8 elements using qint16x8x2_t = int16x8x2_t; /**< 16 bit fixed point vector with 16 elements */ using qint16x8x3_t = int16x8x3_t; /**< 16 bit fixed point vector with 24 elements */ using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 elements */ +using qint32x4_t = int32x4_t; /**< 32 bit fixed point vector with 4 elements */ /** Get the lower half of a 16 elements vector * @@ -55,6 +56,14 @@ using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 element */ qint8x8_t vget_low_qs8(qint8x16_t a); +/** Get the lower half of a 16 elements vector + * + * @param[in] a vector of 8 elements + * + * @return 16 bit fixed point vector (4 elements) + */ +qint16x4_t vget_low_qs16(qint16x8_t a); + /** Get the higher half of a 16 elements vector * * @param[in] a vector of 16 elements @@ -63,21 +72,21 @@ qint8x8_t vget_low_qs8(qint8x16_t a); */ qint8x8_t vget_high_qs8(qint8x16_t a); -/** Load a single 8 bit fixed point vector from memory (8 elements) +/** Get the higher half of a 16 elements vector * - * @param[in] addr Memory address of the 8 bit fixed point vector to load + * @param[in] a vector of 8 elements * - * @return 8 bit fixed point vector (8 elements) + * @return 16 bit fixed point vector (4 elements) */ -qint8x8_t vld1_qs8(const qint8_t *addr); +qint16x4_t vget_high_qs16(qint16x8_t a); -/** Load a single 8 bit fixed point vector from memory (16 elements) +/** Load a single 8 bit fixed point vector from memory (8 elements) * * @param[in] addr Memory address of the 8 bit fixed point vector to load * - * @return 8 bit fixed point vector (16 elements) + * @return 8 bit fixed point vector (8 elements) */ -qint8x16_t vld1q_qs8(const qint8_t *addr); +qint8x8_t vld1_qs8(const qint8_t *addr); /** Load a single 16 bit fixed point vector from memory (4 elements) * @@ -87,6 +96,14 @@ qint8x16_t vld1q_qs8(const qint8_t *addr); */ qint16x4_t vld1_qs16(const qint16_t *addr); +/** Load a single 8 bit fixed point vector from memory (16 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point vector to load + * + * @return 8 bit fixed point vector (16 elements) + */ +qint8x16_t vld1q_qs8(const qint8_t *addr); + /** Load a single 16 bit fixed point vector from memory (8 elements) * * @param[in] addr Memory address of the 16 bit fixed point vector to load @@ -103,6 +120,14 @@ qint16x8_t vld1q_qs16(const qint16_t *addr); */ qint8x8_t vld1_dup_qs8(const qint8_t *addr); +/** Load all lanes of 16 bit fixed point vector with same value from memory (4 elements) + * + * @param[in] addr Memory address of the 16 bit fixed point scalar value to load + * + * @return 16 bit fixed point vector (4 elements) + */ +qint16x4_t vld1_dup_qs16(const qint16_t *addr); + /** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements) * * @param[in] addr Memory address of the 8 bit fixed point scalar value to load @@ -111,21 +136,21 @@ qint8x8_t vld1_dup_qs8(const qint8_t *addr); */ qint8x16_t vld1q_dup_qs8(const qint8_t *addr); -/** Store a single 8 bit fixed point vector to memory (8 elements) +/** Load all lanes of 16 bit fixed point vector with same value from memory (8 elements) * - * @param[in] addr Memory address where the 8 bit fixed point vector should be stored - * @param[in] b 8 bit fixed point vector to store + * @param[in] addr Memory address of the 16 bit fixed point scalar value to load * + * @return 16 bit fixed point vector (8 elements) */ -void vst1_qs8(qint8_t *addr, qint8x8_t b); +qint16x8_t vld1q_dup_qs16(const qint16_t *addr); -/** Store a single 8 bit fixed point vector to memory (16 elements) +/** Store a single 8 bit fixed point vector to memory (8 elements) * * @param[in] addr Memory address where the 8 bit fixed point vector should be stored * @param[in] b 8 bit fixed point vector to store * */ -void vst1q_qs8(qint8_t *addr, qint8x16_t b); +void vst1_qs8(qint8_t *addr, qint8x8_t b); /** Store a single 16 bit fixed point vector to memory (4 elements) * @@ -137,10 +162,18 @@ void vst1_qs16(qint16_t *addr, qint16x4_t b); /** Store a single 8 bit fixed point vector to memory (16 elements) * - * @param[in] addr Memory address where the 16 bit fixed point vector should be stored - * @param[in] b 16 bit fixed point vector to store + * @param[in] addr Memory address where the 8 bit fixed point vector should be stored + * @param[in] b 8 bit fixed point vector to store * */ +void vst1q_qs8(qint8_t *addr, qint8x16_t b); + +/** Store a single 16 bit fixed point vector to memory (8 elements) +* +* @param[in] addr Memory address where the 16 bit fixed point vector should be stored +* @param[in] b 16 bit fixed point vector to store +* +*/ void vst1q_qs16(qint16_t *addr, qint16x8_t b); /** 16 bit fixed point vector saturating narrow (8 elements) @@ -151,6 +184,14 @@ void vst1q_qs16(qint16_t *addr, qint16x8_t b); */ qint8x8_t vqmovn_q16(qint16x8_t a); +/** 32 bit fixed point vector saturating narrow (4 elements) + * + * @param[in] a 32 bit fixed point vector to convert + * + * @return 16 bit fixed point vector + */ +qint16x4_t vqmovn_q32(qint32x4_t a); + /** 8 bit fixed point vector duplicate (8 elements) * * @param[in] a 8 bit fixed point to duplicate @@ -159,6 +200,14 @@ qint8x8_t vqmovn_q16(qint16x8_t a); */ qint8x8_t vdup_n_qs8(qint8_t a); +/** 16 bit fixed point vector duplicate (4 elements) + * + * @param[in] a 16 bit fixed point to duplicate + * + * @return The result of the vector duplication + */ +qint16x4_t vdup_n_qs16(qint16_t a); + /** 8 bit fixed point vector duplicate (16 elements) * * @param[in] a 8 bit fixed point to duplicate @@ -192,6 +241,14 @@ qint16x8_t vdupq_n_qs16(qint16x8_t a); */ qint8x8_t vabs_qs8(qint8x8_t a); +/** Absolute value of 16 bit fixed point vector (4 elements) + * + * @param[in] a 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector absolute value + */ +qint16x4_t vabs_qs16(qint16x4_t a); + /** Absolute value of 8 bit fixed point vector (16 elements) * * @param[in] a 8 bit fixed point input vector @@ -200,6 +257,14 @@ qint8x8_t vabs_qs8(qint8x8_t a); */ qint8x16_t vabsq_qs8(qint8x16_t a); +/** Absolute value of 16 bit fixed point vector (8 elements) + * + * @param[in] a 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector absolute value + */ +qint16x8_t vabsq_qs16(qint16x8_t a); + /** Saturating absolute value of 8 bit fixed point vector (8 elements) * * @param[in] a 8 bit fixed point input vector @@ -208,6 +273,14 @@ qint8x16_t vabsq_qs8(qint8x16_t a); */ qint8x8_t vqabs_qs8(qint8x8_t a); +/** Saturating absolute value of 16 bit fixed point vector (4 elements) + * + * @param[in] a 4 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector absolute value + */ +qint16x4_t vqabs_qs16(qint16x4_t a); + /** Saturating absolute value of 8 bit fixed point vector (16 elements) * * @param[in] a 8 bit fixed point input vector @@ -216,6 +289,14 @@ qint8x8_t vqabs_qs8(qint8x8_t a); */ qint8x16_t vqabsq_qs8(qint8x16_t a); +/** Saturating absolute value of 16 bit fixed point vector (8 elements) + * + * @param[in] a 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector absolute value + */ +qint16x8_t vqabsq_qs16(qint16x8_t a); + /** 8 bit fixed point vector max (8 elements) * * @param[in] a First 8 bit fixed point input vector @@ -225,6 +306,15 @@ qint8x16_t vqabsq_qs8(qint8x16_t a); */ qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b); +/** 16 bit fixed point vector max (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector max operation + */ +qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b); + /** 8 bit fixed point vector max (16 elements) * * @param[in] a First 8 bit fixed point input vector @@ -234,6 +324,15 @@ qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b); */ qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b); +/** 16 bit fixed point vector max (8 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector max operation + */ +qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b); + /** 8 bit fixed point vector pairwise max (8 elements) * * @param[in] a First 8 bit fixed point input vector @@ -243,6 +342,15 @@ qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b); */ qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b); +/** 16 bit fixed point vector pairwise max (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector pairwise max operation + */ +qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b); + /** 8 bit fixed point vector min (8 elements) * * @param[in] a First 8 bit fixed point input vector @@ -252,6 +360,15 @@ qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b); */ qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b); +/** 16 bit fixed point vector min (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector max operation + */ +qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b); + /** 8 bit fixed point vector min (16 elements) * * @param[in] a First 8 bit fixed point input vector @@ -261,6 +378,15 @@ qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b); */ qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b); +/** 16 bit fixed point vector min (8 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector min operation + */ +qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b); + /** 8 bit fixed point vector pairwise min (8 elements) * * @param[in] a First 8 bit fixed point input vector @@ -270,6 +396,15 @@ qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b); */ qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b); +/** 16 bit fixed point vector pairwise min (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector pairwise min operation + */ +qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b); + /** 8 bit fixed point vector add (8 elements) * * @param[in] a First 8 bit fixed point input vector @@ -279,6 +414,15 @@ qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b); */ qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b); +/** 16 bit fixed point vector add (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector addition + */ +qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b); + /** 8 bit fixed point vector add (16 elements) * * @param[in] a First 8 bit fixed point input vector @@ -288,23 +432,23 @@ qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b); */ qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b); -/** 8 bit fixed point vector saturating add (8 elements) +/** 16 bit fixed point vector add (8 elements) * - * @param[in] a First 8 bit fixed point input vector - * @param[in] b Second 8 bit fixed point input vector + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector * - * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow + * @return The result of the 16 bit fixed point vector addition */ -qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b); +qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b); -/** 8 bit fixed point vector saturating add (16 elements) +/** 8 bit fixed point vector saturating add (8 elements) * * @param[in] a First 8 bit fixed point input vector * @param[in] b Second 8 bit fixed point input vector * * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow */ -qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b); +qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b); /** 16 bit fixed point vector saturating add (4 elements) * @@ -315,6 +459,15 @@ qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b); */ qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b); +/** 8 bit fixed point vector saturating add (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b); + /** 16 bit fixed point vector saturating add (8 elements) * * @param[in] a First 16 bit fixed point input vector @@ -341,6 +494,15 @@ int16x4_t vpaddl_qs8(qint8x8_t a); */ qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b); +/** 16 bit fixed point vector subtraction (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector subtraction + */ +qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b); + /** 8 bit fixed point vector subtraction (16 elements) * * @param[in] a First 8 bit fixed point input vector @@ -350,6 +512,15 @@ qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b); */ qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b); +/** 16 bit fixed point vector subtraction (8 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector subtraction + */ +qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b); + /** 8 bit fixed point vector saturating subtraction (8 elements) * * @param[in] a First 8 bit fixed point input vector @@ -359,6 +530,15 @@ qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b); */ qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b); +/** 16 bit fixed point vector saturating subtraction (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow + */ +qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b); + /** 8 bit fixed point vector saturating subtraction (16 elements) * * @param[in] a First 8 bit fixed point input vector @@ -368,6 +548,15 @@ qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b); */ qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b); +/** 16 bit fixed point vector saturating subtraction (8 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow + */ +qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b); + /** 8 bit fixed point vector multiply (8 elements) * * @param[in] a First 8 bit fixed point input vector @@ -378,6 +567,16 @@ qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b); */ qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); +/** 16 bit fixed point vector multiply (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point vector multiplication. + */ +qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position); + /** 8 bit fixed point vector multiply (16 elements) * * @param[in] a First 8 bit fixed point input vector @@ -388,6 +587,16 @@ qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); */ qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position); +/** 16 bit fixed point vector multiply (8 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point vector multiplication. + */ +qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position); + /** 8 bit fixed point vector saturating multiply (8 elements) * * @param[in] a First 8 bit fixed point input vector @@ -398,6 +607,16 @@ qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position); */ qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); +/** 16 bit fixed point vector saturating multiply (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow + */ +qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position); + /** 8 bit fixed point vector saturating multiply (16 elements) * * @param[in] a First 8 bit fixed point input vector @@ -408,6 +627,16 @@ qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); */ qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position); +/** 16 bit fixed point vector saturating multiply (8 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow + */ +qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position); + /** 8 bit fixed point vector long multiply (8 elements) * * @param[in] a First 8 bit fixed point input vector @@ -429,6 +658,17 @@ qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); */ qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); +/** 16 bit fixed point vector multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 16 bit fixed point input vector + * @param[in] c Third 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point vector multiply-accumulate + */ +qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position); + /** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). * * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to @@ -440,6 +680,17 @@ qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_positi */ qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position); +/** 16 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 16 bit fixed point input vector + * @param[in] c Third 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point vector multiply-accumulate + */ +qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position); + /** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). * * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to @@ -451,6 +702,17 @@ qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_p */ qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); +/** 16 bit fixed point vector saturating multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 16 bit fixed point input vector + * @param[in] c Third 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow + */ +qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position); + /** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). * * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to @@ -462,6 +724,17 @@ qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_posit */ qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position); +/** 16 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 16 bit fixed point input vector + * @param[in] c Third 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow + */ +qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position); + /** 8 bit fixed point vector multiply-accumulate long (8 elements). * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements * @@ -474,6 +747,18 @@ qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_ */ qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); +/** 16 bit fixed point vector multiply-accumulate long (4 elements). + * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements + * + * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 16 bit fixed point input vector + * @param[in] c Third 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point vector multiply-accumulate long + */ +qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position); + /** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector. * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements * @@ -486,6 +771,18 @@ qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_pos */ qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); +/** 16 bit fixed point vector saturating multiply-accumulate long (4 elements). The saturation is performed on the 16 bit fixed point output vector. + * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements + * + * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 16 bit fixed point input vector + * @param[in] c Third 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit fixed point vector multiply-accumulate long + */ +qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position); + /** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements * * @param[in] a Float input vector @@ -493,7 +790,16 @@ qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_po * * @return The result of the conversion float -> 8 bit fixed point */ -qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position); +qint8x8_t vcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position); + +/** Convert a float vector with 4 elements to 16 bit fixed point vector with 4 elements + * + * @param[in] a Float input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 16 bit fixed point + */ +qint16x4_t vcvt_qs16_f32(const float32x4_t a, int fixed_point_position); /** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements * @@ -504,6 +810,15 @@ qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position); */ qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position); +/** Convert a float vector with 4x2 elements to 16 bit fixed point vector with 8 elements + * + * @param[in] a Float input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 16 bit fixed point + */ +qint16x8_t vcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position); + /** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements * * @param[in] a 8 bit fixed point input vector @@ -513,6 +828,15 @@ qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position); */ float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position); +/** Convert a 16 bit fixed point vector with 4 elements to a float vector with 4 elements + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 16 bit fixed point -> float32x2 + */ +float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position); + /** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements * * @param[in] a 8 bit fixed point input vector @@ -522,6 +846,15 @@ float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position); */ float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position); +/** Convert a 16 bit fixed point vector with 8 elements to a float vector with 4x2 elements + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 16 bit fixed point -> float32x4x2 + */ +float32x4x2_t vcvtq_qs16_f32(qint16x8_t a, int fixed_point_position); + /** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements) * * @param[in] a 8bit fixed point input vector @@ -531,6 +864,15 @@ float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position); */ qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position); +/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (4 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit reciprocal (1/a). + */ +qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position); + /** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements) * * @param[in] a 8bit fixed point input vector @@ -540,6 +882,15 @@ qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position); */ qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position); +/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit reciprocal (1/a). + */ +qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position); + /** Division fixed point 8bit (8 elements) * * @param[in] a First 8bit fixed point input vector @@ -550,6 +901,16 @@ qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position); */ qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position); +/** Division fixed point 16 bit (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The quotient and remainder number in fixed point format. + */ +qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position); + /** Division fixed point 8bit (16 elements) * * @param[in] a First 8bit fixed point input vector @@ -558,7 +919,17 @@ qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position); * * @return The quotient and remainder number in 8bit fixed point format. */ -qint8x16_t vdivq_qs8(qint8x16_t a, int8x16_t b, int fixed_point_position); +qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position); + +/** Division fixed point 16 bit (8 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The quotient and remainder number in 16 bit fixed point format. + */ +qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position); /** Perform a 4th degree polynomial approximation. (8 elements) * @@ -570,6 +941,16 @@ qint8x16_t vdivq_qs8(qint8x16_t a, int8x16_t b, int fixed_point_position); template qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position); +/** Perform a 4th degree polynomial approximation. (4 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit taylor approximation. + */ +template +qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position); + /** Perform a 4th degree polynomial approximation. (16 elements) * * @param[in] a 8bit fixed point input vector @@ -580,6 +961,16 @@ qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position); template qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position); +/** Perform a 4th degree polynomial approximation. (8 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit taylor approximation. + */ +template +qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position); + /** Calculate saturating exponential fixed point 8bit (8 elements) * * @param[in] a 8bit fixed point input vector @@ -589,6 +980,15 @@ qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position); */ qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position); +/** Calculate saturating exponential fixed point 16 bit (4 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit saturating exponential + */ +qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position); + /** Calculate saturating exponential fixed point 8bit (16 elements) * * @param[in] a 8bit fixed point input vector @@ -598,7 +998,16 @@ qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position); */ qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position); -/** Calculate logarithm fixed point 16bit (8 elements) +/** Calculate saturating exponential fixed point 16 bit (8 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit saturating exponential + */ +qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position); + +/** Calculate logarithm fixed point 8 bit (8 elements) * * @param[in] a 8bit fixed point input vector * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number @@ -607,6 +1016,15 @@ qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position); */ qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position); +/** Calculate logarithm fixed point 16 bit (4 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit logarithm. + */ +qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position); + /** Calculate logarithm fixed point 16bit (16 elements) * * @param[in] a 8bit fixed point input vector @@ -616,6 +1034,15 @@ qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position); */ qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position); +/** Calculate logarithm fixed point 16 bit (8 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit logarithm. + */ +qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position); + /** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements) * * @param[in] a 8bit fixed point input vector @@ -625,6 +1052,15 @@ qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position); */ qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position); +/** Calculate inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit inverse sqrt. + */ +qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position); + /** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements) * * @param[in] a 8bit fixed point input vector @@ -634,6 +1070,15 @@ qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position); */ qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position); +/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit inverse sqrt. + */ +qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position); + /** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements) * * @param[in] a 8bit fixed point input vector @@ -643,6 +1088,15 @@ qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position); */ qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position); +/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit inverse sqrt. + */ +qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position); + /** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements) * * @param[in] a 8bit fixed point input vector @@ -652,6 +1106,15 @@ qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position); */ qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position); +/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (8 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 16 bit inverse sqrt. + */ +qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position); + /** Calculate hyperbolic tangent for fixed point 8bit (8 elements) * * @param[in] a 8bit fixed point input vector @@ -661,6 +1124,15 @@ qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position); */ qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position); +/** Calculate hyperbolic tangent for fixed point 16 bit (4 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The calculated Hyperbolic Tangent. + */ +qint16x4_t vtanh_qs16(qint16x4_t a, int fixed_point_position); + /** Calculate hyperbolic tangent for fixed point 8bit (16 elements) * * @param[in] a 8bit fixed point input vector @@ -690,6 +1162,15 @@ qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position); * @return The lane-by-lane maximum -> float32x4x2 */ float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b); + +/** Calculate hyperbolic tangent for fixed point 8bit (8 elements) + * + * @param[in] a 16 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The calculated Hyperbolic Tangent. + */ +qint16x8_t vtanhq_qs16(qint16x8_t a, int fixed_point_position); } #include "arm_compute/core/NEON/NEFixedPoint.inl" #endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */ diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl index b57fd3edd2..4f7f44ab3b 100644 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -26,7 +26,7 @@ namespace arm_compute { /**< Exponent polynomial coefficients for 8 bit fixed point (8 elements) * Format is in Q0.7 for all elements */ -const std::array exp_tab_qs8 = +static const std::array exp_tab_qs8 = { { vdup_n_s8(0x7F), // 0.9978546 @@ -36,9 +36,21 @@ const std::array exp_tab_qs8 = } }; +/**< Exponent polynomial coefficients for 16 bit fixed point (4 elements) + * Format is in Q0.15 for all elements */ +static const std::array exp_tab_qs16 = +{ + { + vdup_n_s16(0x7FBA), // 0.9978546 + vdup_n_s16(0x3FE9), // 0.4994721 + vdup_n_s16(0x1693), // 0.1763723 + vdup_n_s16(0x0592), // 0.0435108 + } +}; + /**< Exponent polynomial coefficients for 8 bit fixed point (16 elements) * Format is in Q0.7 for all elements */ -const std::array exp_tabq_qs8 = +static const std::array exp_tabq_qs8 = { { vdupq_n_s8(0x7F), // 0.9978546 @@ -48,9 +60,21 @@ const std::array exp_tabq_qs8 = } }; +/**< Exponent polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements */ +static const std::array exp_tabq_qs16 = +{ + { + vdupq_n_s16(0x7FBA), // 0.9978546 + vdupq_n_s16(0x3FE9), // 0.4994721 + vdupq_n_s16(0x1693), // 0.1763723 + vdupq_n_s16(0x0592), // 0.0435108 + } +}; + /**< Logarithm polynomial coefficients for 8 bit fixed point (8 elements) * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ -const std::array log_tab_qs8 = +static const std::array log_tab_qs8 = { { vdup_n_s8(0x5C), // 1.4384189 @@ -60,9 +84,21 @@ const std::array log_tab_qs8 = } }; +/**< Logarithm polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements except the first one which is in Q1.14 */ +static const std::array log_tab_qs16 = +{ + { + vdup_n_s16(0x5C0F), // 1.4384189 + vdup_n_s16(-0x56AE), // -0.6771900 + vdup_n_s16(0x2933), // 0.3218538 + vdup_n_s16(-0x0AA7), // -0.0832229 + } +}; + /**< Logarithm polynomial coefficients for 8 bit fixed point (16 elements) * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ -const std::array log_tabq_qs8 = +static const std::array log_tabq_qs8 = { { vdupq_n_s8(0x5C), // 1.4384189 @@ -72,24 +108,41 @@ const std::array log_tabq_qs8 = } }; +/**< Logarithm polynomial coefficients for 16 bit fixed point (8 elements) + * Format is in Q0.15 for all elements except the first one which is in Q1.14 */ +static const std::array log_tabq_qs16 = +{ + { + vdupq_n_s16(0x5C0F), // 1.4384189 + vdupq_n_s16(-0x56AE), // -0.6771900 + vdupq_n_s16(0x2933), // 0.3218538 + vdupq_n_s16(-0x0AA7), // -0.0832229 + } +}; + inline qint8x8_t vget_low_qs8(qint8x16_t a) { return vget_low_s8(a); } +inline qint16x4_t vget_low_qs16(qint16x8_t a) +{ + return vget_low_s16(a); +} + inline qint8x8_t vget_high_qs8(qint8x16_t a) { return vget_high_s8(a); } -inline qint8x8_t vld1_qs8(const qint8_t *addr) +inline qint16x4_t vget_high_qs16(qint16x8_t a) { - return vld1_s8(addr); + return vget_high_s16(a); } -inline qint8x16_t vld1q_qs8(const qint8_t *addr) +inline qint8x8_t vld1_qs8(const qint8_t *addr) { - return vld1q_s8(addr); + return vld1_s8(addr); } inline qint16x4_t vld1_qs16(const qint16_t *addr) @@ -97,6 +150,11 @@ inline qint16x4_t vld1_qs16(const qint16_t *addr) return vld1_s16(addr); } +inline qint8x16_t vld1q_qs8(const qint8_t *addr) +{ + return vld1q_s8(addr); +} + inline qint16x8_t vld1q_qs16(const qint16_t *addr) { return vld1q_s16(addr); @@ -107,19 +165,24 @@ inline qint8x8_t vld1_dup_qs8(const qint8_t *addr) return vld1_dup_s8(addr); } +inline qint16x4_t vld1_dup_qs16(const qint16_t *addr) +{ + return vld1_dup_s16(addr); +} + inline qint8x16_t vld1q_dup_qs8(const qint8_t *addr) { return vld1q_dup_s8(addr); } -inline void vst1_qs8(qint8_t *addr, qint8x8_t b) +inline qint16x8_t vld1q_dup_qs16(const qint16_t *addr) { - vst1_s8(addr, b); + return vld1q_dup_s16(addr); } -inline void vst1q_qs8(qint8_t *addr, qint8x16_t b) +inline void vst1_qs8(qint8_t *addr, qint8x8_t b) { - vst1q_s8(addr, b); + vst1_s8(addr, b); } inline void vst1_qs16(qint16_t *addr, qint16x4_t b) @@ -127,6 +190,11 @@ inline void vst1_qs16(qint16_t *addr, qint16x4_t b) vst1_s16(addr, b); } +inline void vst1q_qs8(qint8_t *addr, qint8x16_t b) +{ + vst1q_s8(addr, b); +} + inline void vst1q_qs16(qint16_t *addr, qint16x8_t b) { vst1q_s16(addr, b); @@ -137,11 +205,21 @@ inline qint8x8_t vqmovn_qs16(qint16x8_t a) return vqmovn_s16(a); } +inline qint16x4_t vqmovn_qs32(qint32x4_t a) +{ + return vqmovn_s32(a); +} + inline qint8x8_t vdup_n_qs8(qint8_t a) { return vdup_n_s8(a); } +inline qint16x4_t vdup_n_qs16(qint16_t a) +{ + return vdup_n_s16(a); +} + inline qint8x16_t vdupq_n_qs8(qint8_t a) { return vdupq_n_s8(a); @@ -166,31 +244,61 @@ inline qint16x8_t vdupq_n_qs16(qint16_t a) return vdupq_n_s16(a); } +inline qint32x4_t vdupq_n_qs32(qint32_t a) +{ + return vdupq_n_s32(a); +} + inline qint8x8_t vabs_qs8(qint8x8_t a) { return vabs_s8(a); } +inline qint16x4_t vabs_qs16(qint16x4_t a) +{ + return vabs_s16(a); +} + inline qint8x16_t vabsq_qs8(qint8x16_t a) { return vabsq_s8(a); } +inline qint16x8_t vabsq_qs16(qint16x8_t a) +{ + return vabsq_s16(a); +} + inline qint8x8_t vqabs_qs8(qint8x8_t a) { return vqabs_s8(a); } +inline qint16x4_t vqabs_qs16(qint16x4_t a) +{ + return vqabs_s16(a); +} + inline qint8x16_t vqabsq_qs8(qint8x16_t a) { return vqabsq_s8(a); } +inline qint16x8_t vqabsq_qs16(qint16x8_t a) +{ + return vqabsq_s16(a); +} + inline qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b) { return vmax_s8(a, b); } +inline qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b) +{ + return vmax_s16(a, b); +} + inline qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b) { return vmaxq_s8(a, b); @@ -201,11 +309,26 @@ inline qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b) return vpmax_s8(a, b); } +inline qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b) +{ + return vpmax_s16(a, b); +} + +inline qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b) +{ + return vmaxq_s16(a, b); +} + inline qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b) { return vmin_s8(a, b); } +inline qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b) +{ + return vmin_s16(a, b); +} + inline qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b) { return vminq_s8(a, b); @@ -216,24 +339,39 @@ inline qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b) return vpmin_s8(a, b); } +inline qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b) +{ + return vpmin_s16(a, b); +} + +inline qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b) +{ + return vminq_s16(a, b); +} + inline qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b) { return vadd_s8(a, b); } +inline qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b) +{ + return vadd_s16(a, b); +} + inline qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b) { return vaddq_s8(a, b); } -inline qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b) +inline qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b) { - return vqadd_s8(a, b); + return vaddq_s16(a, b); } -inline qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b) +inline qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b) { - return vqaddq_s8(a, b); + return vqadd_s8(a, b); } inline qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b) @@ -241,6 +379,11 @@ inline qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b) return vqadd_s16(a, b); } +inline qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vqaddq_s8(a, b); +} + inline qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b) { return vqaddq_s16(a, b); @@ -256,21 +399,41 @@ inline qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b) return vsub_s8(a, b); } +inline qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b) +{ + return vsub_s16(a, b); +} + inline qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b) { return vsubq_s8(a, b); } +inline qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b) +{ + return vsubq_s16(a, b); +} + inline qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b) { return vqsub_s8(a, b); } +inline qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b) +{ + return vqsub_s16(a, b); +} + inline qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b) { return vqsubq_s8(a, b); } +inline qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b) +{ + return vqsubq_s16(a, b); +} + inline qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) { const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); @@ -288,6 +451,23 @@ inline qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) return vmovn_s16(res); } +inline qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position) +{ + const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position); + + // Initialize the temporary result with a constant used to round up the result + qint32x4_t res = vdupq_n_s32(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + res = vmlal_s16(res, a, b); + + // Shift right by fixed_point_position + res = vshlq_s32(res, fixed_point_position_s32); + + // Convert back to qint16 + return vmovn_s32(res); +} + inline qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) { const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); @@ -308,6 +488,26 @@ inline qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position return vcombine_s8(vmovn_s16(res0), vmovn_s16(res1)); } +inline qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position) +{ + const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint32x4_t res0 = vdupq_n_s32(1 << (fixed_point_position - 1)); + qint32x4_t res1 = res0; + + // Vector multiply-accumulate long + res0 = vmlal_s16(res0, vget_low_qs16(a), vget_low_qs16(b)); + res1 = vmlal_s16(res1, vget_high_qs16(a), vget_high_qs16(b)); + + // Shift right by fixed_point_position + res0 = vshlq_s32(res0, fixed_point_position_s32); + res1 = vshlq_s32(res1, fixed_point_position_s32); + + // Convert back to qint16 + return vcombine_s16(vmovn_s32(res0), vmovn_s32(res1)); +} + inline qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) { const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); @@ -325,6 +525,23 @@ inline qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) return vqmovn_s16(res); } +inline qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position) +{ + const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position); + + // Initialize the temporary result with a constant used to round up the result + qint32x4_t res = vdupq_n_s32(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + res = vmlal_s16(res, a, b); + + // Shift right by fixed_point_position + res = vqshlq_s32(res, fixed_point_position_s32); + + // Convert back to qint16 and saturate + return vqmovn_s32(res); +} + inline qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) { const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); @@ -345,6 +562,26 @@ inline qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_positio return vcombine_s8(vqmovn_s16(res0), vqmovn_s16(res1)); } +inline qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position) +{ + const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint32x4_t res0 = vdupq_n_s32(1 << (fixed_point_position - 1)); + qint32x4_t res1 = res0; + + // Vector multiply-accumulate long + res0 = vmlal_s16(res0, vget_low_qs16(a), vget_low_qs16(b)); + res1 = vmlal_s16(res1, vget_high_qs16(a), vget_high_qs16(b)); + + // Shift right by fixed_point_position + res0 = vqshlq_s32(res0, fixed_point_position_s32); + res1 = vqshlq_s32(res1, fixed_point_position_s32); + + // Convert back to qint16 and saturate + return vcombine_s16(vqmovn_s32(res0), vqmovn_s32(res1)); +} + inline qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) { const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); @@ -371,6 +608,23 @@ inline qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point return vadd_s8(a, vmovn_s16(tmp)); } +inline qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position) +{ + const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint32x4_t tmp = vdupq_n_s32(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s16(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vshlq_s32(tmp, fixed_point_position_s32); + + // Convert back to qint16 and accumulate + return vadd_s16(a, vmovn_s32(tmp)); +} + inline qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position) { const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); @@ -391,6 +645,26 @@ inline qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_ return vcombine_s8(vadd_s8(vget_low_s8(a), vmovn_s16(tmp0)), vadd_s8(vget_high_s8(a), vmovn_s16(tmp1))); } +inline qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position) +{ + const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint32x4_t tmp0 = vdupq_n_s32(1 << (fixed_point_position - 1)); + qint32x4_t tmp1 = tmp0; + + // Vector multiply-accumulate long + tmp0 = vmlal_s16(tmp0, vget_low_qs16(b), vget_low_qs16(c)); + tmp1 = vmlal_s16(tmp1, vget_high_qs16(b), vget_high_qs16(c)); + + // Shift right by fixed_point_position + tmp0 = vshlq_s32(tmp0, fixed_point_position_s32); + tmp1 = vshlq_s32(tmp1, fixed_point_position_s32); + + // Convert back to qint16 and accumulate + return vcombine_s16(vadd_s16(vget_low_qs16(a), vmovn_s32(tmp0)), vadd_s16(vget_high_qs16(a), vmovn_s32(tmp1))); +} + inline qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) { const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); @@ -408,6 +682,23 @@ inline qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_poin return vqadd_s8(a, vqmovn_s16(tmp)); } +inline qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position) +{ + const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint32x4_t tmp = vdupq_n_s32(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s16(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vqshlq_s32(tmp, fixed_point_position_s32); + + // Convert back to qint8 and accumulate + return vqadd_s16(a, vqmovn_s32(tmp)); +} + inline qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position) { const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); @@ -429,6 +720,27 @@ inline qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed return vqaddq_s8(a, res); } +inline qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position) +{ + const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint32x4_t tmp0 = vdupq_n_s32(1 << (fixed_point_position - 1)); + qint32x4_t tmp1 = tmp0; + + // Vector multiply-accumulate long + tmp0 = vmlal_s16(tmp0, vget_low_qs16(b), vget_low_qs16(c)); + tmp1 = vmlal_s16(tmp1, vget_high_qs16(b), vget_high_qs16(c)); + + // Shift right by fixed_point_position + tmp0 = vqshlq_s32(tmp0, fixed_point_position_s32); + tmp1 = vqshlq_s32(tmp1, fixed_point_position_s32); + + // Convert back to qint16 and accumulate + qint16x8_t res = vcombine_s16(vqmovn_s32(tmp0), vqmovn_s32(tmp1)); + return vqaddq_s16(a, res); +} + inline qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) { const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); @@ -446,6 +758,23 @@ inline qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_po return vaddq_s16(a, tmp); } +inline qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position) +{ + const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint32x4_t tmp = vdupq_n_s32(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s16(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vshlq_s32(tmp, fixed_point_position_s32); + + // Accumulate + return vaddq_s32(a, tmp); +} + inline qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) { const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); @@ -463,6 +792,23 @@ inline qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_p return vqaddq_s16(a, tmp); } +inline qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position) +{ + const int32x4_t fixed_point_position_s32 = vdupq_n_s32(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint32x4_t tmp = vdupq_n_s32(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s16(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vqshlq_s32(tmp, fixed_point_position_s32); + + // Accumulate + return vqaddq_s32(a, tmp); +} + inline qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position) { const float32x4_t pow2 = vdupq_n_f32(static_cast(1 << fixed_point_position)); @@ -491,6 +837,19 @@ inline qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position) return vqmovn_s16(res_s16); } +inline qint16x4_t vcvt_qs16_f32(const float32x4_t a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(static_cast(1 << fixed_point_position)); + + float32x4_t res_f32 = vdupq_n_f32(0.5f); + + res_f32 = vmlaq_f32(res_f32, a, pow2); + + const int32x4_t res_s32 = vcvtq_s32_f32(res_f32); + + return vqmovn_s32(res_s32); +} + inline qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position) { const float32x4_t pow2 = vdupq_n_f32(static_cast(1 << fixed_point_position)); @@ -531,6 +890,32 @@ inline qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position return vcombine_s8(vqmovn_s16(res_s16.val[0]), vqmovn_s16(res_s16.val[1])); } +inline qint16x8_t vcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(static_cast(1 << fixed_point_position)); + + float32x4x2_t res_f32 = + { + { + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f) + } + }; + + res_f32.val[0] = vmlaq_f32(res_f32.val[0], a.val[0], pow2); + res_f32.val[1] = vmlaq_f32(res_f32.val[1], a.val[1], pow2); + + const int32x4x2_t res_s32 = + { + { + vcvtq_s32_f32(res_f32.val[0]), + vcvtq_s32_f32(res_f32.val[1]) + } + }; + + return vcombine_s16(vqmovn_s32(res_s32.val[0]), vqmovn_s32(res_s32.val[1])); +} + inline float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position) { const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position)); @@ -540,8 +925,8 @@ inline float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position) const int32x4x2_t res_s32 = { { - vmovl_s16(vget_low_s16(res_s16)), - vmovl_s16(vget_high_s16(res_s16)) + vmovl_s16(vget_low_qs16(res_s16)), + vmovl_s16(vget_high_qs16(res_s16)) } }; @@ -559,6 +944,14 @@ inline float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position) return res_f32; } +inline float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position)); + const float32x4_t res_f32 = vcvtq_f32_s32(vmovl_s16(a)); + + return vmulq_f32(res_f32, pow2); +} + inline float32x4x4_t vcvtq_f32_qs8(qint8x16_t a, int fixed_point_position) { const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position)); @@ -574,10 +967,10 @@ inline float32x4x4_t vcvtq_f32_qs8(qint8x16_t a, int fixed_point_position) const int32x4x4_t res_s32 = { { - vmovl_s16(vget_low_s16(res_s16.val[0])), - vmovl_s16(vget_high_s16(res_s16.val[0])), - vmovl_s16(vget_low_s16(res_s16.val[1])), - vmovl_s16(vget_high_s16(res_s16.val[1])), + vmovl_s16(vget_low_qs16(res_s16.val[0])), + vmovl_s16(vget_high_qs16(res_s16.val[0])), + vmovl_s16(vget_low_qs16(res_s16.val[1])), + vmovl_s16(vget_high_qs16(res_s16.val[1])), } }; @@ -599,18 +992,44 @@ inline float32x4x4_t vcvtq_f32_qs8(qint8x16_t a, int fixed_point_position) return res_f32; } +inline float32x4x2_t vcvtq_f32_qs16(qint16x8_t a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position)); + + const int32x4x2_t res_s32 = + { + { + vmovl_s16(vget_low_qs16(a)), + vmovl_s16(vget_high_qs16(a)) + } + }; + + float32x4x2_t res_f32 = + { + { + vcvtq_f32_s32(res_s32.val[0]), + vcvtq_f32_s32(res_s32.val[1]) + } + }; + + res_f32.val[0] = vmulq_f32(res_f32.val[0], pow2); + res_f32.val[1] = vmulq_f32(res_f32.val[1], pow2); + + return res_f32; +} + inline qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position) { // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 - const qint8x8_t const_48_over_17 = vdup_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 - const qint8x8_t const_minus_32_over_17 = vdup_n_s8(-(0x3C >> (5 - fixed_point_position))); // -1.8823 - const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + const qint8x8_t const_48_over_17 = vdup_n_s8(0x5A >> (5 - fixed_point_position)); // 2.823 + const qint8x8_t const_32_over_17 = vdup_n_s8((0x3C >> (5 - fixed_point_position))); // 1.8823 + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); // Find shift value const qint8x8_t shift_value = vneg_s8(vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); const qint8x8_t temp = vshl_s8(a, shift_value); - qint8x8_t x = vadd_s8(const_48_over_17, vmul_qs8(temp, const_minus_32_over_17, fixed_point_position)); + qint8x8_t x = vadd_s8(const_48_over_17, vmul_qs8(temp, const_32_over_17, fixed_point_position)); uint8x8_t set_one = vcgt_s8(x, const_one); x = vbsl_s8(set_one, const_one, x); @@ -623,18 +1042,44 @@ inline qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position) return vshl_s8(x, shift_value); } +inline qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.13 to Q15.0 + const qint16x4_t const_48_over_17 = vdup_n_s16(0x5A5A >> (13 - fixed_point_position)); // 2.823 + const qint16x4_t const_32_over_17 = vdup_n_s16(0x3C3C >> (13 - fixed_point_position)); // 1.8823 + const qint16x4_t const_one = vdup_n_s16(1 << fixed_point_position); + + // Find shift value + const qint16x4_t shift_value = vneg_s16(vsub_s16(vdup_n_s16(8), vadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position)))); + const qint16x4_t temp = vshl_s16(a, shift_value); + + qint16x4_t x = vadd_s16(const_48_over_17, vmul_qs16(temp, const_32_over_17, fixed_point_position)); + + uint16x4_t set_one = vcgt_s16(x, const_one); + x = vbsl_s16(set_one, const_one, x); + + // Use five iterations of Newton-Raphson method to get the result + x = vadd_s16(x, vmul_qs16(x, vsub_s16(const_one, vmul_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vadd_s16(x, vmul_qs16(x, vsub_s16(const_one, vmul_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vadd_s16(x, vmul_qs16(x, vsub_s16(const_one, vmul_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vadd_s16(x, vmul_qs16(x, vsub_s16(const_one, vmul_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vadd_s16(x, vmul_qs16(x, vsub_s16(const_one, vmul_qs16(temp, x, fixed_point_position)), fixed_point_position)); + + return vshl_s16(x, shift_value); +} + inline qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position) { // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 - const qint8x16_t const_48_over_17 = vdupq_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 - const qint8x16_t const_minus_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823 - const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + const qint8x16_t const_48_over_17 = vdupq_n_s8(0x5A >> (5 - fixed_point_position)); // 2.823 + const qint8x16_t const_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823 + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); // Find shift value const qint8x16_t shift_value = vnegq_s8(vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); const qint8x16_t temp = vshlq_s8(a, shift_value); - qint8x16_t x = vsubq_qs8(const_48_over_17, vmulq_qs8(temp, const_minus_32_over_17, fixed_point_position)); + qint8x16_t x = vsubq_qs8(const_48_over_17, vmulq_qs8(temp, const_32_over_17, fixed_point_position)); // Set initial guess to one if x > 1 uint8x16_t set_one = vcgtq_s8(x, const_one); @@ -648,18 +1093,45 @@ inline qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position) return vshlq_s8(x, shift_value); } +inline qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.13 to Q15.0 + const qint16x8_t const_48_over_17 = vdupq_n_s16(0x5A56 >> (13 - fixed_point_position)); // 2.823 + const qint16x8_t const_32_over_17 = vdupq_n_s16(0x3C3C >> (13 - fixed_point_position)); // 1.8823 + const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position); + + // Find shift value + const qint16x8_t shift_value = vnegq_s16(vsubq_s16(vdupq_n_s16(16), vaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position)))); + const qint16x8_t temp = vshlq_s16(a, shift_value); + + qint16x8_t x = vsubq_qs16(const_48_over_17, vmulq_qs16(temp, const_32_over_17, fixed_point_position)); + + // Set initial guess to one if x > 1 + uint16x8_t set_one = vcgtq_s16(x, const_one); + x = vbslq_s16(set_one, const_one, x); + + // Use five iterations of Newton-Raphson method to get the result + x = vaddq_s16(x, vmulq_qs16(x, vsubq_s16(const_one, vmulq_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vaddq_s16(x, vmulq_qs16(x, vsubq_s16(const_one, vmulq_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vaddq_s16(x, vmulq_qs16(x, vsubq_s16(const_one, vmulq_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vaddq_s16(x, vmulq_qs16(x, vsubq_s16(const_one, vmulq_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vaddq_s16(x, vmulq_qs16(x, vsubq_s16(const_one, vmulq_qs16(temp, x, fixed_point_position)), fixed_point_position)); + + return vshlq_s16(x, shift_value); +} + inline qint8x16_t vqrecipq_qs8(qint8x16_t a, int fixed_point_position) { // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 - const qint8x16_t const_48_over_17 = vdupq_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 - const qint8x16_t const_minus_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823 - const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + const qint8x16_t const_48_over_17 = vdupq_n_s8(0x5A >> (5 - fixed_point_position)); // 2.823 + const qint8x16_t const_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823 + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); // Find shift value const qint8x16_t shift_value = vqnegq_s8(vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); const qint8x16_t temp = vqshlq_s8(a, shift_value); - qint8x16_t x = vqsubq_qs8(const_48_over_17, vmulq_qs8(temp, const_minus_32_over_17, fixed_point_position)); + qint8x16_t x = vqsubq_qs8(const_48_over_17, vmulq_qs8(temp, const_32_over_17, fixed_point_position)); // Set initial guess to one if x > 1 uint8x16_t set_one = vcgtq_s8(x, const_one); @@ -673,18 +1145,55 @@ inline qint8x16_t vqrecipq_qs8(qint8x16_t a, int fixed_point_position) return vqshlq_s8(x, shift_value); } +inline qint16x8_t vqrecipq_qs16(qint16x8_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.13 to Q15.0 + const qint16x8_t const_48_over_17 = vdupq_n_s16(0x5A56 >> (13 - fixed_point_position)); // 2.823 + const qint16x8_t const_32_over_17 = vdupq_n_s16(0x3C3C >> (13 - fixed_point_position)); // 1.8823 + const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position); + + // Find shift value + const qint16x8_t shift_value = vqnegq_s16(vqsubq_s16(vdupq_n_s16(16), vqaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position)))); + const qint16x8_t temp = vqshlq_s16(a, shift_value); + + qint16x8_t x = vqsubq_qs16(const_48_over_17, vqmulq_qs16(temp, const_32_over_17, fixed_point_position)); + + // Set initial guess to one if x > 1 + uint16x8_t set_one = vcgtq_s16(x, const_one); + x = vbslq_s16(set_one, const_one, x); + + // Use five iterations of Newton-Raphson method to get the result + x = vqaddq_s16(x, vqmulq_qs16(x, vqsubq_s16(const_one, vqmulq_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vqaddq_s16(x, vqmulq_qs16(x, vqsubq_s16(const_one, vqmulq_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vqaddq_s16(x, vqmulq_qs16(x, vqsubq_s16(const_one, vqmulq_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vqaddq_s16(x, vqmulq_qs16(x, vqsubq_s16(const_one, vqmulq_qs16(temp, x, fixed_point_position)), fixed_point_position)); + x = vqaddq_s16(x, vqmulq_qs16(x, vqsubq_s16(const_one, vqmulq_qs16(temp, x, fixed_point_position)), fixed_point_position)); + + return vqshlq_s16(x, shift_value); +} + inline qint8x8_t vdiv_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) { return vmul_qs8(a, vrecip_qs8(b, fixed_point_position), fixed_point_position); } +inline qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position) +{ + return vmul_qs16(a, vrecip_qs16(b, fixed_point_position), fixed_point_position); +} + inline qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) { return vmulq_qs8(a, vrecipq_qs8(b, fixed_point_position), fixed_point_position); } +inline qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position) +{ + return vmulq_qs16(a, vrecipq_qs16(b, fixed_point_position), fixed_point_position); +} + template -inline qint8x8_t vtaylor_poly_qs8(int8x8_t a, int fixed_point_position) +inline qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position) { const qint8x8_t shift_value = vdup_n_s8(-(7 - fixed_point_position)); const qint8x8_t const_one = vdup_n_s8(1); @@ -699,8 +1208,24 @@ inline qint8x8_t vtaylor_poly_qs8(int8x8_t a, int fixed_point_position) return res; } +template +inline qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position) +{ + const qint16x4_t shift_value = vdup_n_s16(-(15 - fixed_point_position)); + const qint16x4_t const_one = vdup_n_s16(1); + const qint16x4_t A = vrshl_s16(islog ? log_tab_qs16[0] : exp_tab_qs16[0], islog ? vadd_s16(shift_value, const_one) : shift_value); + const qint16x4_t B = vrshl_s16(islog ? log_tab_qs16[1] : exp_tab_qs16[1], shift_value); + const qint16x4_t C = vrshl_s16(islog ? log_tab_qs16[2] : exp_tab_qs16[2], shift_value); + const qint16x4_t D = vrshl_s16(islog ? log_tab_qs16[3] : exp_tab_qs16[3], shift_value); + const qint16x4_t x1 = vadd_s16(vmul_qs16(a, D, fixed_point_position), C); + const qint16x4_t x2 = vadd_s16(vmul_qs16(a, x1, fixed_point_position), B); + const qint16x4_t x3 = vadd_s16(vmul_qs16(a, x2, fixed_point_position), A); + const qint16x4_t res = vmul_qs16(a, x3, fixed_point_position); + return res; +} + template -inline qint8x8_t vqtaylor_poly_qs8(int8x8_t a, int fixed_point_position) +inline qint8x8_t vqtaylor_poly_qs8(qint8x8_t a, int fixed_point_position) { const qint8x8_t shift_value = vdup_n_s8(-(7 - fixed_point_position)); const qint8x8_t const_one = vdup_n_s8(1); @@ -715,6 +1240,22 @@ inline qint8x8_t vqtaylor_poly_qs8(int8x8_t a, int fixed_point_position) return res; } +template +inline qint16x4_t vqtaylor_poly_qs16(qint16x4_t a, int fixed_point_position) +{ + const qint16x4_t shift_value = vdup_n_s16(-(15 - fixed_point_position)); + const qint16x4_t const_one = vdup_n_s16(1); + const qint16x4_t A = vqrshl_s16(islog ? log_tab_qs16[0] : exp_tab_qs16[0], islog ? vqadd_s16(shift_value, const_one) : shift_value); + const qint16x4_t B = vqrshl_s16(islog ? log_tab_qs16[1] : exp_tab_qs16[1], shift_value); + const qint16x4_t C = vqrshl_s16(islog ? log_tab_qs16[2] : exp_tab_qs16[2], shift_value); + const qint16x4_t D = vqrshl_s16(islog ? log_tab_qs16[3] : exp_tab_qs16[3], shift_value); + const qint16x4_t x1 = vqadd_s16(vqmul_qs16(a, D, fixed_point_position), C); + const qint16x4_t x2 = vqadd_s16(vqmul_qs16(a, x1, fixed_point_position), B); + const qint16x4_t x3 = vqadd_s16(vqmul_qs16(a, x2, fixed_point_position), A); + const qint16x4_t res = vqmul_qs16(a, x3, fixed_point_position); + return res; +} + template inline qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position) { @@ -731,6 +1272,22 @@ inline qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position) return res; } +template +inline qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position) +{ + const qint16x8_t shift_value = vdupq_n_s16(-(15 - fixed_point_position)); + const qint16x8_t const_one = vdupq_n_s16(1); + const qint16x8_t A = vrshlq_s16(islog ? log_tabq_qs16[0] : exp_tabq_qs16[0], islog ? vaddq_s16(shift_value, const_one) : shift_value); + const qint16x8_t B = vrshlq_s16(islog ? log_tabq_qs16[1] : exp_tabq_qs16[1], shift_value); + const qint16x8_t C = vrshlq_s16(islog ? log_tabq_qs16[2] : exp_tabq_qs16[2], shift_value); + const qint16x8_t D = vrshlq_s16(islog ? log_tabq_qs16[3] : exp_tabq_qs16[3], shift_value); + const qint16x8_t x1 = vaddq_s16(vmulq_qs16(a, D, fixed_point_position), C); + const qint16x8_t x2 = vaddq_s16(vmulq_qs16(a, x1, fixed_point_position), B); + const qint16x8_t x3 = vaddq_s16(vmulq_qs16(a, x2, fixed_point_position), A); + const qint16x8_t res = vmulq_qs16(a, x3, fixed_point_position); + return res; +} + template inline qint8x16_t vqtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position) { @@ -747,6 +1304,22 @@ inline qint8x16_t vqtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position) return res; } +template +inline qint16x8_t vqtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position) +{ + const qint16x8_t shift_value = vdupq_n_s16(-(15 - fixed_point_position)); + const qint16x8_t const_one = vdupq_n_s16(1); + const qint16x8_t A = vqrshlq_s16(islog ? log_tabq_qs16[0] : exp_tabq_qs16[0], islog ? vqaddq_s16(shift_value, const_one) : shift_value); + const qint16x8_t B = vqrshlq_s16(islog ? log_tabq_qs16[1] : exp_tabq_qs16[1], shift_value); + const qint16x8_t C = vqrshlq_s16(islog ? log_tabq_qs16[2] : exp_tabq_qs16[2], shift_value); + const qint16x8_t D = vqrshlq_s16(islog ? log_tabq_qs16[3] : exp_tabq_qs16[3], shift_value); + const qint16x8_t x1 = vqaddq_s16(vqmulq_qs16(a, D, fixed_point_position), C); + const qint16x8_t x2 = vqaddq_s16(vqmulq_qs16(a, x1, fixed_point_position), B); + const qint16x8_t x3 = vqaddq_s16(vqmulq_qs16(a, x2, fixed_point_position), A); + const qint16x8_t res = vqmulq_qs16(a, x3, fixed_point_position); + return res; +} + inline qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position) { const qint8x8_t shift_value = vdup_n_s8(fixed_point_position - 7); @@ -773,6 +1346,32 @@ inline qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position) return poly; } +inline qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position) +{ + const qint16x4_t shift_value = vdup_n_s16(fixed_point_position - 15); + const qint16x4_t const_one = vdup_n_s16(1 << fixed_point_position); + const qint16x4_t const_ln2 = vqrshl_s16(vdup_n_s16(0x58B9), shift_value); // ln(2) + const qint16x4_t const_inv_ln2 = vorr_s16(vqrshl_s16(vdup_n_s16(0x38AA), shift_value), const_one); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + const qint16x4_t m = vqmul_qs16(a, const_inv_ln2, fixed_point_position); // x / ln(2) + + // get decimal part from m + const qint16x4_t dec_m = vqshl_s16(m, vdup_n_s16(-fixed_point_position)); + + qint16x4_t alpha = vqmul_qs16(vqshl_s16(dec_m, vdup_n_s16(fixed_point_position)), const_ln2, fixed_point_position); + alpha = vqabs_qs16(vqsub_s16(a, alpha)); + + // Polynomial Approximation + qint16x4_t poly = vqtaylor_poly_qs16(alpha, fixed_point_position); + poly = vqadd_s16(poly, const_one); + + // Reconstruct + poly = vqshl_s16(poly, dec_m); + + return poly; +} + inline qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position) { const qint8x16_t shift_value = vdupq_n_s8(fixed_point_position - 7); @@ -799,6 +1398,32 @@ inline qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position) return poly; } +inline qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position) +{ + const qint16x8_t shift_value = vdupq_n_s16(fixed_point_position - 15); + const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position); + const qint16x8_t const_ln2 = vqrshlq_s16(vdupq_n_s16(0x58B9), shift_value); // ln(2) + const qint16x8_t const_inv_ln2 = vorrq_s16(vqrshlq_s16(vdupq_n_s16(0x38AA), shift_value), const_one); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + const qint16x8_t m = vqmulq_qs16(a, const_inv_ln2, fixed_point_position); // x / ln(2) + + // get decimal part from m + const qint16x8_t dec_m = vqshlq_s16(m, vdupq_n_s16(-fixed_point_position)); + + qint16x8_t alpha = vqmulq_qs16(vqshlq_s16(dec_m, vdupq_n_s16(fixed_point_position)), const_ln2, fixed_point_position); + alpha = vqabsq_qs16(vqsubq_qs16(a, alpha)); + + // Polynomial Approximation + qint16x8_t poly = vqtaylor_polyq_qs16(alpha, fixed_point_position); + poly = vqaddq_s16(poly, const_one); + + // Reconstruct + poly = vqshlq_s16(poly, dec_m); + + return poly; +} + inline qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position) { const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); @@ -838,6 +1463,45 @@ inline qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position) return poly; } +inline qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position) +{ + const qint16x4_t const_one = vdup_n_s16(1 << fixed_point_position); + const qint16x4_t const_fifteen_dec = vdup_n_s16(15); + const qint16x4_t const_ln2 = vdup_n_s16(0x58B9 >> (15 - fixed_point_position)); // ln(2) + + // If 0 < a < 1, calculate log(1/x) + uint16x4_t calc_reciprocal = vclt_s16(a, const_one); + qint16x4_t recip = vdup_n_s16(0); + recip = vbsl_s16(calc_reciprocal, recip, a); + + // Calculate reciprocal + recip = vrecip_qs16(recip, fixed_point_position); + a = vbsl_s16(calc_reciprocal, recip, a); + + // Get decimal part of a + qint16x4_t shift_value = vdup_n_s16(-fixed_point_position); + qint16x4_t dec_a = vshl_s16(a, shift_value); // a >> fixed_point_position + + // Get exponent of 2^n which is equal or less than dec_a + shift_value = vsub_s16(const_fifteen_dec, vclz_s16(dec_a)); + + // Get x to range (1, 2] + const qint16x4_t shift_value_neg = vneg_s16(shift_value); + const qint16x4_t temp = vsub_s16(vrshl_s16(a, shift_value_neg), const_one); + const qint16x4_t sum = vmul_s16(shift_value, const_one); + + // Polynomial Approximation + qint16x4_t poly = vtaylor_poly_qs16(temp, fixed_point_position); + + // Reconstruct + poly = vmul_qs16(vadd_s16(poly, sum), const_ln2, fixed_point_position); + + // Set negative value for 0 < a < 1 + poly = vbsl_s16(calc_reciprocal, vneg_s16(poly), poly); + + return poly; +} + inline qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position) { const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); @@ -877,6 +1541,45 @@ inline qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position) return poly; } +inline qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position) +{ + const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position); + const qint16x8_t const_fifteen_dec = vdupq_n_s16(15); + const qint16x8_t const_ln2 = vdupq_n_s16(0x58B9 >> (15 - fixed_point_position)); // ln(2) + + // If 0 < a < 1, calculate log(1/x) + uint16x8_t calc_reciprocal = vcltq_s16(a, const_one); + qint16x8_t recip = vdupq_n_s16(0); + recip = vbslq_s16(calc_reciprocal, a, recip); + + // Calculate reciprocal + recip = vqrecipq_qs16(recip, fixed_point_position); + a = vbslq_s16(calc_reciprocal, recip, a); + + // Get decimal part of a + qint16x8_t shift_value = vdupq_n_s16(-fixed_point_position); + qint16x8_t dec_a = vshlq_s16(a, shift_value); // a >> fixed_point_position + + // Get exponent of 2^n which is equal or less than dec_a + shift_value = vqsubq_s16(const_fifteen_dec, vclzq_s16(dec_a)); + + // Get x to range (1, 2] + const qint16x8_t shift_value_neg = vnegq_s16(shift_value); + const qint16x8_t temp = vqsubq_s16(vrshlq_s16(a, shift_value_neg), const_one); + const qint16x8_t sum = vmulq_s16(shift_value, const_one); + + // Polynomial Approximation + qint16x8_t poly = vtaylor_polyq_qs16(temp, fixed_point_position); + + // Reconstruct + poly = vqmulq_qs16(vqaddq_s16(poly, sum), const_ln2, fixed_point_position); + + // Set negative value for 0 < a < 1 + poly = vbslq_s16(calc_reciprocal, vnegq_s16(poly), poly); + + return poly; +} + inline qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position) { const qint8x8_t const_three = vdup_n_s8(3 << fixed_point_position); @@ -904,17 +1607,46 @@ inline qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position) return vshl_s8(x, shift_value2); } +inline qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position) +{ + const qint16x4_t const_three = vdup_n_s16(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint16x4_t shift_value = vneg_s16(vsub_s16(vdup_n_s16(16), vadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint16x4_t temp = vsub_s16(vdup_n_s16(16), vadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position))); + uint16x4_t temp_ltz = vclt_s16(temp, vdup_n_qs16(0)); + temp = vbsl_s16(temp_ltz, vadd_s16(temp, vdup_n_s16(1)), temp); + qint16x4_t shift_value2 = vneg_s16(vshr_n_s16(temp, 1)); + + temp = vshl_s16(a, shift_value); + + // Initial guess + qint16x4_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After five iterations we have the result for 8 bit + x = vshr_n_s16(vmul_qs16(x, vsub_s16(const_three, vmul_qs16(temp, vmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s16(vmul_qs16(x, vsub_s16(const_three, vmul_qs16(temp, vmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s16(vmul_qs16(x, vsub_s16(const_three, vmul_qs16(temp, vmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s16(vmul_qs16(x, vsub_s16(const_three, vmul_qs16(temp, vmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s16(vmul_qs16(x, vsub_s16(const_three, vmul_qs16(temp, vmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshl_s16(x, shift_value2); +} + inline qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position) { const qint8x8_t const_three = vdup_n_s8(3 << fixed_point_position); // Find shift value. Number must be in (0.5, 2) range. - qint8x8_t shift_value = vneg_s8(vqsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); + qint8x8_t shift_value = vneg_s8(vqsub_s8(vdup_n_s8(8), vqadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); // Add one when the shift value is negative in order to get the correct result when we shift right with 1 - qint8x8_t temp = vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))); + qint8x8_t temp = vqsub_s8(vdup_n_s8(8), vqadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))); uint8x8_t temp_ltz = vclt_s8(temp, vdup_n_qs8(0)); - temp = vbsl_s8(temp_ltz, vadd_s8(temp, vdup_n_s8(1)), temp); + temp = vbsl_s8(temp_ltz, vqadd_s8(temp, vdup_n_s8(1)), temp); qint8x8_t shift_value2 = vneg_s8(vshr_n_s8(temp, 1)); temp = vshl_s8(a, shift_value); @@ -931,6 +1663,35 @@ inline qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position) return vshl_s8(x, shift_value2); } +inline qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position) +{ + const qint16x4_t const_three = vdup_n_s16(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint16x4_t shift_value = vneg_s16(vqsub_s16(vdup_n_s16(16), vqadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint16x4_t temp = vqsub_s16(vdup_n_s16(16), vqadd_s16(vclz_s16(a), vdup_n_s16(fixed_point_position))); + uint16x4_t temp_ltz = vclt_s16(temp, vdup_n_qs16(0)); + temp = vbsl_s16(temp_ltz, vqadd_s16(temp, vdup_n_s16(1)), temp); + qint16x4_t shift_value2 = vneg_s16(vshr_n_s16(temp, 1)); + + temp = vshl_s16(a, shift_value); + + // Initial guess + qint16x4_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After five iterations we have the result for 16 bit + x = vshr_n_s16(vqmul_qs16(x, vqsub_s16(const_three, vqmul_qs16(temp, vqmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s16(vqmul_qs16(x, vqsub_s16(const_three, vqmul_qs16(temp, vqmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s16(vqmul_qs16(x, vqsub_s16(const_three, vqmul_qs16(temp, vqmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s16(vqmul_qs16(x, vqsub_s16(const_three, vqmul_qs16(temp, vqmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s16(vqmul_qs16(x, vqsub_s16(const_three, vqmul_qs16(temp, vqmul_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vqshl_s16(x, shift_value2); +} + inline qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position) { const qint8x16_t const_three = vdupq_n_s8(3 << fixed_point_position); @@ -958,17 +1719,46 @@ inline qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position) return vshlq_s8(x, shift_value2); } +inline qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position) +{ + const qint16x8_t const_three = vdupq_n_s16(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint16x8_t shift_value = vnegq_s16(vsubq_s16(vdupq_n_s16(16), vaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint16x8_t temp = vsubq_s16(vdupq_n_s16(16), vaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position))); + uint16x8_t temp_ltz = vcltq_s16(temp, vdupq_n_qs16(0)); + temp = vbslq_s16(temp_ltz, vaddq_s16(temp, vdupq_n_s16(1)), temp); + qint16x8_t shift_value2 = vnegq_s16(vshrq_n_s16(temp, 1)); + + temp = vshlq_s16(a, shift_value); + + // Initial guess + qint16x8_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After five iterations we have the result for 16 bit + x = vshrq_n_s16(vmulq_qs16(x, vsubq_s16(const_three, vmulq_qs16(temp, vmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s16(vmulq_qs16(x, vsubq_s16(const_three, vmulq_qs16(temp, vmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s16(vmulq_qs16(x, vsubq_s16(const_three, vmulq_qs16(temp, vmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s16(vmulq_qs16(x, vsubq_s16(const_three, vmulq_qs16(temp, vmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s16(vmulq_qs16(x, vsubq_s16(const_three, vmulq_qs16(temp, vmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshlq_s16(x, shift_value2); +} + inline qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position) { const qint8x16_t const_three = vdupq_n_s8(3 << fixed_point_position); // Find shift value. Number must be in (0.5, 2) range. - qint8x16_t shift_value = vnegq_s8(vqsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + qint8x16_t shift_value = vnegq_s8(vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); // Add one when the shift value is negative in order to get the correct result when we shift right with 1 - qint8x16_t temp = vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))); + qint8x16_t temp = vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))); uint8x16_t temp_ltz = vcltq_s8(temp, vdupq_n_qs8(0)); - temp = vbslq_s8(temp_ltz, vaddq_s8(temp, vdupq_n_s8(1)), temp); + temp = vbslq_s8(temp_ltz, vqaddq_s8(temp, vdupq_n_s8(1)), temp); qint8x16_t shift_value2 = vnegq_s8(vshrq_n_s8(temp, 1)); temp = vshlq_s8(a, shift_value); @@ -982,7 +1772,36 @@ inline qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position) x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); - return vshlq_s8(x, shift_value2); + return vqshlq_s8(x, shift_value2); +} + +inline qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position) +{ + const qint16x8_t const_three = vdupq_n_s16(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint16x8_t shift_value = vnegq_s16(vqsubq_s16(vdupq_n_s16(16), vqaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint16x8_t temp = vqsubq_s16(vdupq_n_s16(16), vqaddq_s16(vclzq_s16(a), vdupq_n_s16(fixed_point_position))); + uint16x8_t temp_ltz = vcltq_s16(temp, vdupq_n_qs16(0)); + temp = vbslq_s16(temp_ltz, vqaddq_s16(temp, vdupq_n_s16(1)), temp); + qint16x8_t shift_value2 = vnegq_s16(vshrq_n_s16(temp, 1)); + + temp = vqshlq_s16(a, shift_value); + + // Initial guess + qint16x8_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After five iterations we have the result for 16 bit + x = vshrq_n_s16(vqmulq_qs16(x, vqsubq_s16(const_three, vqmulq_qs16(temp, vqmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s16(vqmulq_qs16(x, vqsubq_s16(const_three, vqmulq_qs16(temp, vqmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s16(vqmulq_qs16(x, vqsubq_s16(const_three, vqmulq_qs16(temp, vqmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s16(vqmulq_qs16(x, vqsubq_s16(const_three, vqmulq_qs16(temp, vqmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s16(vqmulq_qs16(x, vqsubq_s16(const_three, vqmulq_qs16(temp, vqmulq_qs16(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vqshlq_s16(x, shift_value2); } inline qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position) @@ -998,6 +1817,19 @@ inline qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position) return tanh; } +inline qint16x4_t vtanh_qs16(qint16x4_t a, int fixed_point_position) +{ + const qint16x4_t const_one = vdup_n_s16(1 << fixed_point_position); + const qint16x4_t const_two = vdup_n_s16(2 << fixed_point_position); + + qint16x4_t exp2x = vqexp_qs16(vqmul_qs16(const_two, a, fixed_point_position), fixed_point_position); + qint16x4_t num = vqsub_qs16(exp2x, const_one); + qint16x4_t den = vqadd_qs16(exp2x, const_one); + qint16x4_t tanh = vqmul_qs16(num, vrecip_qs16(den, fixed_point_position), fixed_point_position); + + return tanh; +} + inline qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position) { const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); @@ -1027,4 +1859,17 @@ inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b) }; return res; } + +inline qint16x8_t vtanhq_qs16(qint16x8_t a, int fixed_point_position) +{ + const qint16x8_t const_one = vdupq_n_s16(1 << fixed_point_position); + const qint16x8_t const_two = vdupq_n_s16(2 << fixed_point_position); + + qint16x8_t exp2x = vqexpq_qs16(vqmulq_qs16(const_two, a, fixed_point_position), fixed_point_position); + qint16x8_t num = vqsubq_qs16(exp2x, const_one); + qint16x8_t den = vqaddq_qs16(exp2x, const_one); + qint16x8_t tanh = vqmulq_qs16(num, vqrecipq_qs16(den, fixed_point_position), fixed_point_position); + + return tanh; +} } diff --git a/tests/Utils.h b/tests/Utils.h index a6181b3c78..25023d46e6 100644 --- a/tests/Utils.h +++ b/tests/Utils.h @@ -499,6 +499,7 @@ void store_value_with_data_type(void *ptr, T value, DataType data_type) *reinterpret_cast(ptr) = value; break; case DataType::S16: + case DataType::QS16: *reinterpret_cast(ptr) = value; break; case DataType::U32: diff --git a/tests/validation/FixedPoint.h b/tests/validation/FixedPoint.h index 380bad04a1..fa99ff84c2 100644 --- a/tests/validation/FixedPoint.h +++ b/tests/validation/FixedPoint.h @@ -756,8 +756,9 @@ struct functions fixed_point a = shift < 0 ? shift_left(x, -shift) : shift_right(x, shift); const fixed_point x_half = shift_right(a, 1); - // We need three iterations to find the result - for(int i = 0; i < 3; ++i) + // We need three iterations to find the result for QS8 and five for QS16 + constexpr int num_iterations = std::is_same::value ? 3 : 5; + for(int i = 0; i < num_iterations; ++i) { a = mul(a, sub(three_half, mul(x_half, mul(a, a)))); } diff --git a/tests/validation/NEON/Fixedpoint/Exp_QS16.cpp b/tests/validation/NEON/Fixedpoint/Exp_QS16.cpp new file mode 100644 index 0000000000..e6d7d860cb --- /dev/null +++ b/tests/validation/NEON/Fixedpoint/Exp_QS16.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "TypePrinter.h" +#include "Utils.h" +#include "validation/Datasets.h" +#include "validation/ReferenceCPP.h" +#include "validation/Validation.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/NEFixedPoint.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "boost_wrapper.h" + +#include +#include + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::neon; +using namespace arm_compute::test::validation; + +namespace +{ +const float tolerance = 1.0f; /**< Tolerance value for comparing reference's output against implementation's output */ + +/** Compute Neon exponential function for signed 16 bit fixed point. + * + * @param[in] shape Shape of the input and output tensors. + * + * @return Computed output tensor. + */ +Tensor compute_exp_qs16(const TensorShape &shape, int fixed_point_position) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::QS16, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::QS16, 1, fixed_point_position); + + constexpr unsigned int num_elems_processed_per_iteration = 8; + Window window = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(window, input_access, output_access); + output_access.set_valid_region(window, src.info()->valid_region()); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + BOOST_TEST(!src.info()->is_resizable()); + BOOST_TEST(!dst.info()->is_resizable()); + + // Fill tensors. Keep the range between [-1.0, 1.0) so the result won't + // overflow. + std::uniform_int_distribution<> distribution(-(1 << (fixed_point_position - 1)), (1 << (fixed_point_position - 1))); + library->fill(NEAccessor(src), distribution, 0); + + Iterator input(&src, window); + Iterator output(&dst, window); + + execute_window_loop(window, [&](const Coordinates & id) + { + qint16x8_t in = vld1q_qs16(reinterpret_cast(input.ptr())); + // Use saturated exp + vst1q_qs16(reinterpret_cast(output.ptr()), vqexpq_qs16(in, fixed_point_position)); + }, + input, output); + + return dst; +} +} // namespace + +#ifndef DOXYGEN_SKIP_THIS +BOOST_AUTO_TEST_SUITE(NEON) +BOOST_AUTO_TEST_SUITE(FixedPoint) +BOOST_AUTO_TEST_SUITE(QS16) +BOOST_AUTO_TEST_SUITE(Exp) + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(RunSmall, Small1DShape() * boost::unit_test::data::xrange(1, 15), shape, fixed_point_position) +{ + // Compute function + Tensor dst = compute_exp_qs16(shape, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS16, DataType::QS16, FixedPointOp::EXP, fixed_point_position); + + // Validate output + validate(NEAccessor(dst), ref_dst, tolerance, 0); +} + +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +#endif diff --git a/tests/validation/NEON/Fixedpoint/Exp_QS8.cpp b/tests/validation/NEON/Fixedpoint/Exp_QS8.cpp index 086314fdd3..f8fc0c2ea3 100644 --- a/tests/validation/NEON/Fixedpoint/Exp_QS8.cpp +++ b/tests/validation/NEON/Fixedpoint/Exp_QS8.cpp @@ -78,9 +78,9 @@ Tensor compute_exp_qs8(const TensorShape &shape, int fixed_point_position) BOOST_TEST(!src.info()->is_resizable()); BOOST_TEST(!dst.info()->is_resizable()); - // Fill tensors. Keep the range between (1, (1 << (fixed_point_position - 1))) so the result won't + // Fill tensors. Keep the range between [-1.0, 1.0) so the result won't // overflow. E.g. e^7 = 1096, which cannot be represented in QS8 - std::uniform_int_distribution<> distribution(1, (1 << (fixed_point_position - 1))); + std::uniform_int_distribution<> distribution(-(1 << (fixed_point_position - 1)), (1 << (fixed_point_position - 1))); library->fill(NEAccessor(src), distribution, 0); Iterator input(&src, window); diff --git a/tests/validation/NEON/Fixedpoint/Invsqrt_QS16.cpp b/tests/validation/NEON/Fixedpoint/Invsqrt_QS16.cpp new file mode 100644 index 0000000000..9211ccfe50 --- /dev/null +++ b/tests/validation/NEON/Fixedpoint/Invsqrt_QS16.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "TypePrinter.h" +#include "Utils.h" +#include "validation/Datasets.h" +#include "validation/ReferenceCPP.h" +#include "validation/Validation.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/NEFixedPoint.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "boost_wrapper.h" + +#include +#include + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::neon; +using namespace arm_compute::test::validation; + +namespace +{ +const float tolerance = 5.0f; /**< Tolerance value for comparing reference's output against implementation's output */ + +/** Compute Neon inverse square root function for signed 16 bit fixed point. + * + * @param[in] shape Shape of the input and output tensors. + * + * @return Computed output tensor. + */ +Tensor compute_invsqrt_qs16(const TensorShape &shape, int fixed_point_position) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::QS16, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::QS16, 1, fixed_point_position); + + constexpr unsigned int num_elems_processed_per_iteration = 8; + Window window = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(window, input_access, output_access); + output_access.set_valid_region(window, src.info()->valid_region()); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + BOOST_TEST(!src.info()->is_resizable()); + BOOST_TEST(!dst.info()->is_resizable()); + + // Fill tensors. Keep the range between [1, 0x7FFF) + std::uniform_int_distribution<> distribution(1, 0x7FFF); + library->fill(NEAccessor(src), distribution, 0); + + Iterator input(&src, window); + Iterator output(&dst, window); + + execute_window_loop(window, [&](const Coordinates & id) + { + qint16x8_t in = vld1q_qs16(reinterpret_cast(input.ptr())); + vst1q_qs16(reinterpret_cast(output.ptr()), vqinvsqrtq_qs16(in, fixed_point_position)); + }, + input, output); + + return dst; +} +} // namespace + +#ifndef DOXYGEN_SKIP_THIS +BOOST_AUTO_TEST_SUITE(NEON) +BOOST_AUTO_TEST_SUITE(FixedPoint) +BOOST_AUTO_TEST_SUITE(QS16) +BOOST_AUTO_TEST_SUITE(Invsqrt) + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(RunSmall, Small1DShape() * boost::unit_test::data::xrange(1, 14), shape, fixed_point_position) +{ + // Compute function + Tensor dst = compute_invsqrt_qs16(shape, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS16, DataType::QS16, FixedPointOp::INV_SQRT, fixed_point_position); + + // Validate output + validate(NEAccessor(dst), ref_dst, tolerance, 0); +} + +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +#endif diff --git a/tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp b/tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp index 3308f7d855..ab63cbe76f 100644 --- a/tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp +++ b/tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp @@ -49,7 +49,7 @@ using namespace arm_compute::test::validation; namespace { -const float tolerance = 3; /**< Tolerance value for comparing reference's output against implementation's output */ +const float tolerance = 4.0f; /**< Tolerance value for comparing reference's output against implementation's output */ /** Compute Neon inverse square root function for signed 8bit fixed point. * @@ -78,9 +78,8 @@ Tensor compute_invsqrt_qs8(const TensorShape &shape, int fixed_point_position) BOOST_TEST(!src.info()->is_resizable()); BOOST_TEST(!dst.info()->is_resizable()); - // Fill tensors. Keep the range between (32, 127) so the result won't - // overflow. E.g. for Q2.5 invsqrt(0.001) = 31.6, which cannot be represented. - std::uniform_int_distribution<> distribution(32, 127); + // Fill tensors. Keep the range between [1, 127). + std::uniform_int_distribution<> distribution(1, 127); library->fill(NEAccessor(src), distribution, 0); Iterator input(&src, window); @@ -89,7 +88,7 @@ Tensor compute_invsqrt_qs8(const TensorShape &shape, int fixed_point_position) execute_window_loop(window, [&](const Coordinates & id) { qint8x16_t in = vld1q_s8(reinterpret_cast(input.ptr())); - vst1q_s8(reinterpret_cast(output.ptr()), vinvsqrtq_qs8(in, fixed_point_position)); + vst1q_s8(reinterpret_cast(output.ptr()), vqinvsqrtq_qs8(in, fixed_point_position)); }, input, output); diff --git a/tests/validation/NEON/Fixedpoint/Log_QS16.cpp b/tests/validation/NEON/Fixedpoint/Log_QS16.cpp new file mode 100644 index 0000000000..c23d12725b --- /dev/null +++ b/tests/validation/NEON/Fixedpoint/Log_QS16.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "TypePrinter.h" +#include "Utils.h" +#include "validation/Datasets.h" +#include "validation/ReferenceCPP.h" +#include "validation/Validation.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/NEFixedPoint.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "boost_wrapper.h" + +#include +#include + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::neon; +using namespace arm_compute::test::validation; + +namespace +{ +const float tolerance = 7.0f; /**< Tolerance value for comparing reference's output against implementation's output */ + +/** Compute Neon logarithm function for signed 16 bit fixed point. + * + * @param[in] shape Shape of the input and output tensors. + * + * @return Computed output tensor. + */ +Tensor compute_log_qs16(const TensorShape &shape, int fixed_point_position) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::QS16, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::QS16, 1, fixed_point_position); + + constexpr unsigned int num_elems_processed_per_iteration = 8; + Window window = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(window, input_access, output_access); + output_access.set_valid_region(window, src.info()->valid_region()); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + BOOST_TEST(!src.info()->is_resizable()); + BOOST_TEST(!dst.info()->is_resizable()); + + // Fill tensors. Keep the range between [(1 << (fixed_point_position - 1), 0x3FFF) so the result won't + // overflow. + std::uniform_int_distribution<> distribution((1 << (fixed_point_position - 1)), 0x3FFF); + library->fill(NEAccessor(src), distribution, 0); + + Iterator input(&src, window); + Iterator output(&dst, window); + + execute_window_loop(window, [&](const Coordinates & id) + { + qint16x8_t in = vld1q_qs16(reinterpret_cast(input.ptr())); + vst1q_qs16(reinterpret_cast(output.ptr()), vlogq_qs16(in, fixed_point_position)); + }, + input, output); + + return dst; +} +} // namespace + +#ifndef DOXYGEN_SKIP_THIS +BOOST_AUTO_TEST_SUITE(NEON) +BOOST_AUTO_TEST_SUITE(FixedPoint) +BOOST_AUTO_TEST_SUITE(QS16) +BOOST_AUTO_TEST_SUITE(Log) + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(RunSmall, Small1DShape() * boost::unit_test::data::xrange(4, 14), shape, fixed_point_position) +{ + // Compute function + Tensor dst = compute_log_qs16(shape, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS16, DataType::QS16, FixedPointOp::LOG, fixed_point_position); + + // Validate output + validate(NEAccessor(dst), ref_dst, tolerance, 0); +} + +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +#endif diff --git a/tests/validation/NEON/Fixedpoint/Log_QS8.cpp b/tests/validation/NEON/Fixedpoint/Log_QS8.cpp index 7b734c12b1..6789ec7264 100644 --- a/tests/validation/NEON/Fixedpoint/Log_QS8.cpp +++ b/tests/validation/NEON/Fixedpoint/Log_QS8.cpp @@ -78,9 +78,9 @@ Tensor compute_log_qs8(const TensorShape &shape, int fixed_point_position) BOOST_TEST(!src.info()->is_resizable()); BOOST_TEST(!dst.info()->is_resizable()); - // Fill tensors. Keep the range between ((1 << (fixed_point_position - 1), 63) so the result won't + // Fill tensors. Keep the range between [(1 << (fixed_point_position - 1), 63) so the result won't // overflow. E.g. for Q2.5 ln(0.001) = -6.9, which cannot be represented. - std::uniform_int_distribution<> distribution((1 << (fixed_point_position - 1)), 63); + std::uniform_int_distribution<> distribution((1 << (fixed_point_position - 1)), 0x3F); library->fill(NEAccessor(src), distribution, 0); Iterator input(&src, window); diff --git a/tests/validation/NEON/Fixedpoint/Reciprocal_QS16.cpp b/tests/validation/NEON/Fixedpoint/Reciprocal_QS16.cpp new file mode 100644 index 0000000000..c66cf0e6e6 --- /dev/null +++ b/tests/validation/NEON/Fixedpoint/Reciprocal_QS16.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Globals.h" +#include "NEON/Helper.h" +#include "NEON/NEAccessor.h" +#include "TensorLibrary.h" +#include "TypePrinter.h" +#include "Utils.h" +#include "validation/Datasets.h" +#include "validation/ReferenceCPP.h" +#include "validation/Validation.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/NEFixedPoint.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "boost_wrapper.h" + +#include +#include + +using namespace arm_compute; +using namespace arm_compute::test; +using namespace arm_compute::test::neon; +using namespace arm_compute::test::validation; + +namespace +{ +const float tolerance = 9.0f; /**< Tolerance value for comparing reference's output against implementation's output. */ + +/** Compute Neon reciprocal function for signed 16 bit fixed point. + * + * @param[in] shape Shape of the input and output tensors. + * + * @return Computed output tensor. + */ +Tensor compute_reciprocal_qs16(const TensorShape &shape, int fixed_point_position) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::QS16, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::QS16, 1, fixed_point_position); + + constexpr unsigned int num_elems_processed_per_iteration = 8; + Window window = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(window, input_access, output_access); + output_access.set_valid_region(window, src.info()->valid_region()); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + BOOST_TEST(!src.info()->is_resizable()); + BOOST_TEST(!dst.info()->is_resizable()); + + // Fill tensors. Keep the range between [15, 0x7FFF) so the result won't + // overflow. + std::uniform_int_distribution<> distribution(15, 0x7FFF); + library->fill(NEAccessor(src), distribution, 0); + + Iterator input(&src, window); + Iterator output(&dst, window); + + execute_window_loop(window, [&](const Coordinates & id) + { + qint16x8_t in = vld1q_qs16(reinterpret_cast(input.ptr())); + vst1q_qs16(reinterpret_cast(output.ptr()), vqrecipq_qs16(in, fixed_point_position)); + }, + input, output); + + return dst; +} +} // namespace + +#ifndef DOXYGEN_SKIP_THIS +BOOST_AUTO_TEST_SUITE(NEON) +BOOST_AUTO_TEST_SUITE(FixedPoint) +BOOST_AUTO_TEST_SUITE(QS16) +BOOST_AUTO_TEST_SUITE(Reciprocal) + +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly")) +BOOST_DATA_TEST_CASE(RunSmall, Small1DShape() * boost::unit_test::data::xrange(1, 14), shape, fixed_point_position) +{ + // Compute function + Tensor dst = compute_reciprocal_qs16(shape, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS16, DataType::QS16, FixedPointOp::RECIPROCAL, fixed_point_position); + + // Validate output + validate(NEAccessor(dst), ref_dst, tolerance, 0); +} + +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() +#endif diff --git a/tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp b/tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp index 4c1c782a18..f1f130a9cb 100644 --- a/tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp +++ b/tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp @@ -78,9 +78,9 @@ Tensor compute_reciprocal_qs8(const TensorShape &shape, int fixed_point_position BOOST_TEST(!src.info()->is_resizable()); BOOST_TEST(!dst.info()->is_resizable()); - // Fill tensors. Keep the range between (15, 100) so the result won't + // Fill tensors. Keep the range between [15, 100) so the result won't // overflow. E.g. for Q2.5 reciprocal(0.001) = 1000, which cannot be represented. - std::uniform_int_distribution<> distribution(15, 100); + std::uniform_int_distribution<> distribution(15, 0x7F); library->fill(NEAccessor(src), distribution, 0); Iterator input(&src, window); diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp index be6ddba633..761bc20ea1 100644 --- a/tests/validation/Reference.cpp +++ b/tests/validation/Reference.cpp @@ -657,20 +657,20 @@ RawTensor Reference::compute_reference_fixed_point_operation(const TensorShape & switch(op) { case(FixedPointOp::INV_SQRT): - min = 32; - max = 127; + min = 1; + max = (dt_in == DataType::QS8) ? 0x7F : 0x7FFF; break; case(FixedPointOp::LOG): min = (1 << (fixed_point_position - 1)); - max = 63; + max = (dt_in == DataType::QS8) ? 0x3F : 0x3FFF; break; case(FixedPointOp::EXP): - min = 1; + min = -(1 << (fixed_point_position - 1)); max = (1 << (fixed_point_position - 1)); break; case(FixedPointOp::RECIPROCAL): min = 15; - max = 100; + max = (dt_in == DataType::QS8) ? 0x7F : 0x7FFF; break; default: ARM_COMPUTE_ERROR("Fixed point operation not supported"); diff --git a/tests/validation/TensorFactory.h b/tests/validation/TensorFactory.h index 610425bbfb..d842baddde 100644 --- a/tests/validation/TensorFactory.h +++ b/tests/validation/TensorFactory.h @@ -83,6 +83,7 @@ public: v = Tensor(shape, dt, fixed_point_position, reinterpret_cast(data)); break; case DataType::S16: + case DataType::QS16: using value_type_s16 = typename match_const::type; v = Tensor(shape, dt, fixed_point_position, reinterpret_cast(data)); break; diff --git a/tests/validation/Validation.cpp b/tests/validation/Validation.cpp index 8aada0cb0e..0092e4dfed 100644 --- a/tests/validation/Validation.cpp +++ b/tests/validation/Validation.cpp @@ -78,6 +78,8 @@ double get_double_data(const void *ptr, DataType data_type) return *reinterpret_cast(ptr); case DataType::S16: return *reinterpret_cast(ptr); + case DataType::QS16: + return *reinterpret_cast(ptr); case DataType::U32: return *reinterpret_cast(ptr); case DataType::S32: -- cgit v1.2.1