From 0a8334cb78dae66fdc31257a96ba15f7c41bde50 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 14 Jun 2017 18:00:05 +0100 Subject: COMPMID-400 Add support for 16 bit fixed point arithmetic. Change-Id: Iebfaef1b219d80d6362b7fd4b1357612b31e43cb Reviewed-on: http://mpd-gerrit.cambridge.arm.com/77749 Reviewed-by: Moritz Pflanzer Tested-by: Kaizen --- arm_compute/core/FixedPoint.inl | 197 ++++++++++++++++++++++++++++++++++------ 1 file changed, 168 insertions(+), 29 deletions(-) (limited to 'arm_compute/core/FixedPoint.inl') diff --git a/arm_compute/core/FixedPoint.inl b/arm_compute/core/FixedPoint.inl index 4263a6f00d..fab91d6699 100644 --- a/arm_compute/core/FixedPoint.inl +++ b/arm_compute/core/FixedPoint.inl @@ -46,13 +46,27 @@ namespace arm_compute inline qint8_t sqshl_qs8(qint8_t a, int shift) { qint16_t tmp = static_cast(a) << shift; + // Saturate the result in case of overflow and cast to qint8_t return saturate_convert(tmp); } +inline qint16_t sqshl_qs16(qint16_t a, int shift) +{ + qint32_t tmp = static_cast(a) << shift; + + // Saturate the result in case of overflow and cast to qint16_t + return saturate_convert(tmp); +} + inline qint8_t sabs_qs8(qint8_t a) { - return a & 0x7F; + return (a < 0) ? (a == std::numeric_limits::min()) ? std::numeric_limits::max() : -a : a; +} + +inline qint16_t sabs_qs16(qint16_t a) +{ + return (a < 0) ? (a == std::numeric_limits::min()) ? std::numeric_limits::max() : -a : a; } inline qint8_t sadd_qs8(qint8_t a, qint8_t b) @@ -60,6 +74,11 @@ inline qint8_t sadd_qs8(qint8_t a, qint8_t b) return a + b; } +inline qint16_t sadd_qs16(qint16_t a, qint16_t b) +{ + return a + b; +} + inline qint8_t sqadd_qs8(qint8_t a, qint8_t b) { // We need to store the temporary result in qint16_t otherwise we cannot evaluate the overflow @@ -83,6 +102,11 @@ inline qint8_t ssub_qs8(qint8_t a, qint8_t b) return a - b; } +inline qint16_t ssub_qs16(qint16_t a, qint16_t b) +{ + return a - b; +} + inline qint8_t sqsub_qs8(qint8_t a, qint8_t b) { // We need to store the temporary result in uint16_t otherwise we cannot evaluate the overflow @@ -92,6 +116,15 @@ inline qint8_t sqsub_qs8(qint8_t a, qint8_t b) return saturate_convert(tmp); } +inline qint16_t sqsub_qs16(qint16_t a, qint16_t b) +{ + // We need to store the temporary result in qint32_t otherwise we cannot evaluate the overflow + qint32_t tmp = static_cast(a) - static_cast(b); + + // Saturate the result in case of overflow and cast to qint16_t + return saturate_convert(tmp); +} + inline qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position) { const qint16_t round_up_const = (1 << (fixed_point_position - 1)); @@ -104,6 +137,18 @@ inline qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position) return static_cast(tmp >> fixed_point_position); } +inline qint16_t smul_qs16(qint16_t a, qint16_t b, int fixed_point_position) +{ + const qint32_t round_up_const = (1 << (fixed_point_position - 1)); + + qint32_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return static_cast(tmp >> fixed_point_position); +} + inline qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position) { const qint16_t round_up_const = (1 << (fixed_point_position - 1)); @@ -140,16 +185,28 @@ inline qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position) return tmp >> fixed_point_position; } +inline qint32_t sqmull_qs16(qint16_t a, qint16_t b, int fixed_point_position) +{ + const qint32_t round_up_const = (1 << (fixed_point_position - 1)); + + qint32_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return tmp >> fixed_point_position; +} + inline qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position) { - qint8_t shift = 8 - (fixed_point_position + (__builtin_clz(a) - 24)); + const qint8_t shift = 8 - (fixed_point_position + (__builtin_clz(a) - 24)); - qint8_t const_three = (3 << fixed_point_position); - qint8_t temp = shift < 0 ? (a << -shift) : (a >> shift); - qint8_t x2 = temp; + const qint8_t const_three = (3 << fixed_point_position); + qint8_t temp = shift < 0 ? (a << -shift) : (a >> shift); + qint8_t x2 = temp; // We need three iterations to find the result - for(int i = 0; i < 3; i++) + for(int i = 0; i < 3; ++i) { qint8_t three_minus_dx = ssub_qs8(const_three, smul_qs8(temp, smul_qs8(x2, x2, fixed_point_position), fixed_point_position)); x2 = (smul_qs8(x2, three_minus_dx, fixed_point_position) >> 1); @@ -160,35 +217,84 @@ inline qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position) return temp; } +inline qint16_t sinvsqrt_qs16(qint16_t a, int fixed_point_position) +{ + const qint16_t shift = 16 - (fixed_point_position + (__builtin_clz(a) - 16)); + + const qint16_t const_three = (3 << fixed_point_position); + qint16_t temp = shift < 0 ? (a << -shift) : (a >> shift); + qint16_t x2 = temp; + + // We need three iterations to find the result + for(int i = 0; i < 3; ++i) + { + qint16_t three_minus_dx = ssub_qs16(const_three, smul_qs16(temp, smul_qs16(x2, x2, fixed_point_position), fixed_point_position)); + x2 = smul_qs16(x2, three_minus_dx, fixed_point_position) >> 1; + } + + temp = shift < 0 ? (x2 << ((-shift) >> 1)) : (x2 >> (shift >> 1)); + + return temp; +} + inline qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position) { - qint16_t temp = a << fixed_point_position; - return (qint8_t)(temp / b); + const qint16_t temp = a << fixed_point_position; + return static_cast(temp / b); +} + +inline qint16_t sdiv_qs16(qint16_t a, qint16_t b, int fixed_point_position) +{ + const qint32_t temp = a << fixed_point_position; + return static_cast(temp / b); } inline qint8_t sqexp_qs8(qint8_t a, int fixed_point_position) { // Constants - qint8_t const_one = (1 << fixed_point_position); - qint8_t ln2 = ((0x58 >> (6 - fixed_point_position)) + 1) >> 1; - qint8_t inv_ln2 = (((0x38 >> (6 - fixed_point_position)) + 1) >> 1) | const_one; - qint8_t A = ((0x7F >> (6 - fixed_point_position)) + 1) >> 1; - qint8_t B = ((0x3F >> (6 - fixed_point_position)) + 1) >> 1; - qint8_t C = ((0x16 >> (6 - fixed_point_position)) + 1) >> 1; - qint8_t D = ((0x05 >> (6 - fixed_point_position)) + 1) >> 1; + const qint8_t const_one = (1 << fixed_point_position); + const qint8_t ln2 = ((0x58 >> (6 - fixed_point_position)) + 1) >> 1; + const qint8_t inv_ln2 = (((0x38 >> (6 - fixed_point_position)) + 1) >> 1) | const_one; + const qint8_t A = ((0x7F >> (6 - fixed_point_position)) + 1) >> 1; + const qint8_t B = ((0x3F >> (6 - fixed_point_position)) + 1) >> 1; + const qint8_t C = ((0x16 >> (6 - fixed_point_position)) + 1) >> 1; + const qint8_t D = ((0x05 >> (6 - fixed_point_position)) + 1) >> 1; // Polynomial expansion - int dec_a = (sqmul_qs8(a, inv_ln2, fixed_point_position) >> fixed_point_position); - qint8_t alpha = sabs_qs8(sqsub_qs8(a, sqmul_qs8(ln2, sqshl_qs8(dec_a, fixed_point_position), fixed_point_position))); - qint8_t sum = sqadd_qs8(sqmul_qs8(alpha, D, fixed_point_position), C); - sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), B); - sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), A); - sum = sqmul_qs8(alpha, sum, fixed_point_position); - sum = sqadd_qs8(sum, const_one); + const int dec_a = (sqmul_qs8(a, inv_ln2, fixed_point_position) >> fixed_point_position); + const qint8_t alpha = sabs_qs8(sqsub_qs8(a, sqmul_qs8(ln2, sqshl_qs8(dec_a, fixed_point_position), fixed_point_position))); + qint8_t sum = sqadd_qs8(sqmul_qs8(alpha, D, fixed_point_position), C); + sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), B); + sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), A); + sum = sqmul_qs8(alpha, sum, fixed_point_position); + sum = sqadd_qs8(sum, const_one); return (dec_a < 0) ? (sum >> -dec_a) : sqshl_qs8(sum, dec_a); } +inline qint16_t sqexp_qs16(qint16_t a, int fixed_point_position) +{ + // Constants + const qint16_t const_one = (1 << fixed_point_position); + const qint16_t ln2 = ((0x58B9 >> (14 - fixed_point_position)) + 1) >> 1; + const qint16_t inv_ln2 = (((0x38AA >> (14 - fixed_point_position)) + 1) >> 1) | const_one; + const qint16_t A = ((0x7FBA >> (14 - fixed_point_position)) + 1) >> 1; + const qint16_t B = ((0x3FE9 >> (14 - fixed_point_position)) + 1) >> 1; + const qint16_t C = ((0x1693 >> (14 - fixed_point_position)) + 1) >> 1; + const qint16_t D = ((0x0592 >> (14 - fixed_point_position)) + 1) >> 1; + + // Polynomial expansion + const int dec_a = (sqmul_qs16(a, inv_ln2, fixed_point_position) >> fixed_point_position); + const qint16_t alpha = sabs_qs16(sqsub_qs16(a, sqmul_qs16(ln2, sqshl_qs16(dec_a, fixed_point_position), fixed_point_position))); + qint16_t sum = sqadd_qs16(sqmul_qs16(alpha, D, fixed_point_position), C); + sum = sqadd_qs16(sqmul_qs16(alpha, sum, fixed_point_position), B); + sum = sqadd_qs16(sqmul_qs16(alpha, sum, fixed_point_position), A); + sum = sqmul_qs16(alpha, sum, fixed_point_position); + sum = sqadd_qs16(sum, const_one); + + return (dec_a < 0) ? (sum >> -dec_a) : sqshl_qs16(sum, dec_a); +} + inline qint8_t slog_qs8(qint8_t a, int fixed_point_position) { // Constants @@ -214,14 +320,47 @@ inline qint8_t slog_qs8(qint8_t a, int fixed_point_position) a = ssub_qs8(a, const_one); // Polynomial expansion - auto sum = sqadd_qs8(sqmul_qs8(a, D, fixed_point_position), C); - sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), B); - sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), A); - sum = sqmul_qs8(a, sum, fixed_point_position); + qint8_t sum = sqadd_qs8(sqmul_qs8(a, D, fixed_point_position), C); + sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), B); + sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), A); + sum = sqmul_qs8(a, sum, fixed_point_position); return smul_qs8(sadd_qs8(sum, shift_val << fixed_point_position), ln2, fixed_point_position); } +inline qint16_t slog_qs16(qint16_t a, int fixed_point_position) +{ + // Constants + qint16_t const_one = (1 << fixed_point_position); + qint16_t ln2 = (0x58B9 >> (7 - fixed_point_position)); + qint16_t A = (0x5C0F >> (7 - fixed_point_position - 1)); + qint16_t B = -(0x56AE >> (7 - fixed_point_position)); + qint16_t C = (0x2933 >> (7 - fixed_point_position)); + qint16_t D = -(0x0AA7 >> (7 - fixed_point_position)); + + if((const_one == a) || (a < 0)) + { + return 0; + } + else if(a < const_one) + { + return -slog_qs16(sdiv_qs16(const_one, a, fixed_point_position), fixed_point_position); + } + + // Remove even powers of 2 + qint16_t shift_val = 31 - __builtin_clz(a >> fixed_point_position); + a >>= shift_val; + a = ssub_qs16(a, const_one); + + // Polynomial expansion + qint16_t sum = sqadd_qs16(sqmul_qs16(a, D, fixed_point_position), C); + sum = sqadd_qs16(sqmul_qs16(a, sum, fixed_point_position), B); + sum = sqadd_qs16(sqmul_qs16(a, sum, fixed_point_position), A); + sum = sqmul_qs16(a, sum, fixed_point_position); + + return smul_qs16(sadd_qs16(sum, shift_val << fixed_point_position), ln2, fixed_point_position); +} + inline float scvt_f32_qs8(qint8_t a, int fixed_point_position) { return static_cast(a) / (1 << fixed_point_position); @@ -230,7 +369,7 @@ inline float scvt_f32_qs8(qint8_t a, int fixed_point_position) inline qint8_t scvt_qs8_f32(float a, int fixed_point_position) { // round_nearest_integer(a * 2^(fixed_point_position)) - return static_cast(static_cast(a) * (1 << fixed_point_position) + 0.5f); + return static_cast(a * (1 << fixed_point_position) + 0.5f); } inline float scvt_f32_qs16(qint16_t a, int fixed_point_position) @@ -238,10 +377,10 @@ inline float scvt_f32_qs16(qint16_t a, int fixed_point_position) return static_cast(a) / (1 << fixed_point_position); } -inline qint8_t scvt_qs16_f32(float a, int fixed_point_position) +inline qint16_t scvt_qs16_f32(float a, int fixed_point_position) { // round_nearest_integer(a * 2^(fixed_point_position)) - return static_cast(static_cast(a) * (1 << fixed_point_position) + 0.5f); + return static_cast(a * (1 << fixed_point_position) + 0.5f); } inline qint8_t sqmovn_qs16(qint16_t a) -- cgit v1.2.1