diff options
Diffstat (limited to 'chapters')
-rw-r--r-- | chapters/ewise_binary.adoc | 48 | ||||
-rw-r--r-- | chapters/ewise_unary.adoc | 23 | ||||
-rw-r--r-- | chapters/image.adoc | 23 | ||||
-rw-r--r-- | chapters/introduction.adoc | 53 | ||||
-rw-r--r-- | chapters/pseudocode.adoc | 165 | ||||
-rw-r--r-- | chapters/reduction.adoc | 10 | ||||
-rw-r--r-- | chapters/tensor_ops.adoc | 129 | ||||
-rw-r--r-- | chapters/type_conversion.adoc | 55 |
8 files changed, 353 insertions, 153 deletions
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc index 35e454a..876ab4b 100644 --- a/chapters/ewise_binary.adoc +++ b/chapters/ewise_binary.adoc @@ -22,7 +22,7 @@ if (in_out_t == shape_t) { ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); shape_t value1 = tensor_read<shape_t>(input1, [], []); shape_t value2 = tensor_read<shape_t>(input2, [], []); - shape_t result = apply_add<shape_t>(value1, value2); + shape_t result = apply_add_s<shape_t>(value1, value2); tensor_write<shape_t>(output, [], [], result); } else { ERROR_IF(shape != broadcast_shape(shape1, shape2)); @@ -31,7 +31,7 @@ if (in_out_t == shape_t) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_add<in_out_t>(value1, value2); + in_out_t result = apply_add_s<in_out_t>(value1, value2); tensor_write<in_out_t>(output, shape, index, result); } } @@ -54,15 +54,16 @@ for_each(index in shape) { in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); // Ensure that shift amount is appropriate for the data type - REQUIRE((in_out_t == int32_t && 0 <= value2 && value2 <= 31) || - (in_out_t == int16_t && 0 <= value2 && value2 <= 15) || - (in_out_t == int8_t && 0 <= value2 && value2 <= 7)); + REQUIRE((in_out_t == i32_t && 0 <= value2 && value2 <= 31) || + (in_out_t == i16_t && 0 <= value2 && value2 <= 15) || + (in_out_t == i8_t && 0 <= value2 && value2 <= 7)); - in_out_t result = value1 >> value2; - if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) { + in_out_t result = apply_arith_rshift<in_out_t>(value1, value2); + if (round == true && static_cast<int32_t>(value2) > 0 && + (apply_arith_rshift<in_out_t>(value1, apply_sub_s<in_out_t>(value2, 1)) & 1 != 0) { result = result + 1; } - result = apply_clip<in_out_t>(result, minimum<in_out_t>, maximum<in_out_t>); + result = apply_clip_s<in_out_t>(result, minimum_s<in_out_t>, maximum_s<in_out_t>); tensor_write<in_out_t>(output, shape, index, result); } ---- @@ -156,8 +157,8 @@ if (in_out_t == shape_t) { REQUIRE(value2 != 0); // This catches the case where we divide minimum<in_out_t> by -1 // which is not representable in two's complement - REQUIRE((int64_t)value1 / value2 <= maximum<in_out_t>); - in_out_t result = value1 / value2; + REQUIRE(static_cast<int64_t>(value1) / static_cast<int64_t>(value2) <= maximum_s<in_out_t>); + in_out_t result = apply_intdiv_s<in_out_t>(value1, value2); tensor_write<in_out_t>(output, shape, index, result); } } @@ -219,8 +220,9 @@ for_each(index in shape) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - REQUIRE(0 <= value2 && value2 <= 31); - in_out_t result = (in_out_t)((unsigned in_out_t)value1 >> value2); + REQUIRE(0 <= static_cast<int32_t>(value2) && static_cast<int32_t>(value2) <= 31); + // Logical shifts happen as unsigned types internally + in_out_t result = apply_logical_rshift<in_out_t>(value1, value2); tensor_write<in_out_t>(output, shape, index, result); } ---- @@ -280,7 +282,7 @@ for_each(index in shape) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_max(value1, value2); + in_out_t result = apply_max_s<in_out_t>(value1, value2); tensor_write<in_out_t>(output, shape, index, result); } ---- @@ -300,7 +302,7 @@ for_each(index in shape) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_min(value1, value2); + in_out_t result = apply_min_s(value1, value2); tensor_write<in_out_t>(output, shape, index, result); } ---- @@ -330,14 +332,14 @@ if (in_out_t == shape_t) { in_t value1 = tensor_read<in_t>(input1, shape1, index1); in_t value2 = tensor_read<in_t>(input2, shape2, index2); out_t result; - if (in_t == int32_t && shift > 0) { - int64_t product = (int64_t)value1 * (int64_t)value2; - int64_t round = (int64_t)1 << (shift-1); + if (in_t == i32_t && shift > 0) { + int64_t product = sign_extend<int64_t>(value1) * sign_extend<int64_t>(value2); + int64_t round = static_cast<int64_t>(1) << (shift - 1); product = (product + round) >> shift; - REQUIRE(product >= minimum<int32_t> && product <= maximum<int32_t>) + REQUIRE(product >= minimum_s<i32_t> && product <= maximum_s<i32_t>) result = product; } else { - result = value1 * value2; // low 32-bits of result for int32_t + result = apply_mul_s(value1, value2); // low 32-bits of result for i32_t } tensor_write<out_t>(output, shape, index, result); } @@ -386,7 +388,7 @@ if (in_out_t == shape_t) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_sub<in_out_t>(value1, value2); + in_out_t result = apply_sub_s<in_out_t>(value1, value2); tensor_write<in_out_t>(output, shape, index, result); } } @@ -416,11 +418,11 @@ REQUIRE(length(table) == TABLE_SIZE); for_each(index in shape) { in_t value = tensor_read<in_t>(input, shape, index); out_t result; - if (in_t == int8_t) { + if (in_t == i8_t) { // value is a signed int, convert to a 0 based index - result = table[value + 128]; + result = table[static_cast<int16_t>(value) + 128]; } else { - result = apply_lookup(table, value); + result = apply_lookup_s(static_cast<int16_t>(table), static_cast<int16_t>(value)); } tensor_write<out_t>(output, shape, index, result); } diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc index f630a48..d3eacc4 100644 --- a/chapters/ewise_unary.adoc +++ b/chapters/ewise_unary.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2022 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -26,11 +26,12 @@ include::{generated}/operators/ABS.adoc[] ---- for_each(index in shape) { in_out_t value1 = tensor_read<in_out_t>(input1, shape, index); - if (in_out_t == float_t && value1 == -0.0) { + if (is_floating_point(in_out_t) && value1 == -0.0) { value1 = 0.0; } - if (value1 < 0.0) - value1 = apply_sub<in_out_t>(0, value1); + if (static_cast<int32_t>(value1) < 0.0) { + value1 = apply_sub_s<in_out_t>(0, value1); + } tensor_write<in_out_t>(output, shape, index, value1); } ---- @@ -183,13 +184,17 @@ include::{generated}/operators/NEGATE.adoc[] [source,c++] ---- -ERROR_IF(in_out_t != int8_t && input1_zp != 0) // Zero point only for int8_t -ERROR_IF(in_out_t != int8_t && output_zp != 0) // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && input1_zp != 0) // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && output_zp != 0) // Zero point only for int8_t for_each(index in shape) { in_out_t value1 = tensor_read<in_out_t>(input1, shape, index); - acc_t value = (acc_t)value1 - input1_zp; - value = apply_sub<acc_t>(0, value); - in_out_t result = (in_out_t)apply_clip<acc_t>(value + output_zp, minimum<in_out_t>, maximum<in_out_t>); + acc_t value = apply_sub_s<acc_t>(sign_extend<acc_t>(value1), + sign_extend<acc_t>(input1_zp)); + value = apply_sub_s<acc_t>(0, value); + value = apply_add_s<acc_t>(value, sign_extend<acc_t>(output_zp)); + in_out_t result = truncate<in_out_t>(apply_clip_s<acc_t>(value, + minimum_s<in_out_t>, + maximum_s<in_out_t>)); tensor_write<in_out_t>(output, shape, index, result); } ---- diff --git a/chapters/image.adoc b/chapters/image.adoc index 8abc878..da839f8 100644 --- a/chapters/image.adoc +++ b/chapters/image.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2021 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -91,22 +91,23 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) { int16_t rx = x - ix * scale_x_n; // (x % scale_x_n) if (is_floating_point(resize_t)) { - dy = (resize_t)ry / (resize_t)scale_y_n; - dx = (resize_t)rx / (resize_t)scale_x_n; + dy = static_cast<resize_t>(ry) / static_cast<resize_t>(scale_y_n); + dx = static_cast<resize_t>(rx) / static_cast<resize_t>(scale_x_n); } else { dy = ry; dx = rx; } // Note that -1 <= iy < IH and -1 <= ix < IW - int16_t iy0 = apply_max(iy, 0); - int16_t iy1 = apply_min(iy + 1, IH - 1); - int16_t ix0 = apply_max(ix, 0); - int16_t ix1 = apply_min(ix + 1, IW - 1); + int16_t iy0 = apply_max_s(iy, 0); + int16_t iy1 = apply_min_s(iy + 1, IH - 1); + int16_t ix0 = apply_max_s(ix, 0); + int16_t ix1 = apply_min_s(ix + 1, IW - 1); if (mode==BILINEAR) { - in_t v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]); - in_t v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]); - in_t v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]); - in_t v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]); + using in_s_t = make_signed(in_t); // Use signed calculations for i8/i16 + in_s_t v00 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c])); + in_s_t v01 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c])); + in_s_t v10 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c])); + in_s_t v11 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c])); acc = v00 * (unit_y - dy) * (unit_x - dx); acc += v01 * (unit_y - dy) * dx; acc += v10 * dy * (unit_x - dx); diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc index d6f7bf9..26fef0e 100644 --- a/chapters/introduction.adoc +++ b/chapters/introduction.adoc @@ -338,11 +338,11 @@ fp64_t acc_prec; // 1<<(M+1) where M is the number of mantissa bits fp64_t acc_min_normal; // accumulator minimum normal greater than zero fp64_t two_m63 = -1.0/(fp64)((int64_t)-1<<63); // pow(2, -63) switch (acc_t) { - case fp32_t: acc_prec = (fp64_t)(1<<24); // pow(2, 24) + case fp32_t: acc_prec = static_cast<fp64_t>(1<<24); // pow(2, 24) acc_min_normal = two_m63 * two_m63; // pow(2, -126) break; - case fp16_t: acc_prec = (fp64_t)(1<<11); // pow(2, 11) - acc_min_normal = 1.0/(fp64_t)(1<<14); // pow(2, -14) + case fp16_t: acc_prec = static_cast<fp64_t>(1<<11); // pow(2, 11) + acc_min_normal = 1.0/static_cast<fp64_t>(1<<14); // pow(2, -14) break; default: ERROR_IF(true); } @@ -359,7 +359,7 @@ for_each(index in output_shape) { out_err = 0.0; } else { // 0.0 < out_bnd < infinity out_bnd = max(out_bnd, acc_min_normal); - out_err = ((fp64_t)out_imp - out_ref)*acc_prec/out_bnd; + out_err = (static_cast<fp64_t>(out_imp) - out_ref) * acc_prec / out_bnd; REQUIRE(abs(out_err) <= ksb); } out_err_sum += out_err; @@ -457,11 +457,21 @@ The number formats supported by a given operator are listed in its table of supp | - |Boolean value. Size implementation defined. The TOSA reference model implements this as int8_t with 0 for false and 1 for true. All non-zero values are accepted on input as true. +|i4_t +| - +| - +|Signless 4-bit integer type. Will be interpreted as int4_t by all operators + |int4_t | -7 | +7 |Signed 4-bit two's-complement value. Excludes -8 to maintain a symmetric about zero range for weights. +|i8_t +| - +| - +|Signless 8-bit integer value. Will be interpreted as int8_t unless otherwise specified by an operator. + |int8_t | -128 | +127 @@ -470,7 +480,12 @@ The number formats supported by a given operator are listed in its table of supp |uint8_t | 0 | 255 -|Unsigned 8-bit value. +|Unsigned 8-bit integer value. + +|i16_t +| - +| - +|Signless 16-bit integer type. Will be interpreted as int16_t unless otherwise specified by an operator. |int16_t | -32768 @@ -482,11 +497,21 @@ The number formats supported by a given operator are listed in its table of supp | 65535 |Unsigned 16-bit value. +|i32_t +| - +| - +|Signless 32-bit integer value. Will be interpreted as int32_t by all operators. + |int32_t | -(1<<31) | (1<<31)-1 |Signed 32-bit two's-complement value. +|i48_t +| - +| - +|Signless 32-bit integer value. Will be interpreted as int48_t by all operators. + |int48_t | -(1<<47) | (1<<47)-1 @@ -542,7 +567,9 @@ This ensures that a Base Inference profile TOSA implementation can calculate the === Integer Behavior -Integer calculations must be standard two's-complement or unsigned calculations. +TOSA integer inputs and outputs are specified by signless values with the given number of bits. +Unless otherwise specified, these values will be interpreted as signed twos-complement. +The pseudocode will use int*_t to indicate use as a signed value and uint*_t to indicate use as an unsigned value. If overflow occurs doing integer calculation, the result is unpredictable, as indicated by the REQUIRE checks in the pseudocode for the operators. Unsigned 8 and 16-bit values are only allowed in the RESCALE operation, to allow for compatibility with networks which expect unsigned 8-bit or 16-bit tensors for input and output. @@ -598,20 +625,20 @@ int32_t apply_scale_32(int32_t value, int32_t multiplier, int8_t shift, bool_t d if (shift > 31 && value >= 0) round += 1<<30; if (shift > 31 && value < 0) round -= 1<<30; } - int64_t result = (int64_t)value * multiplier + round; + int64_t result = static_cast<int64_t>(value) * multiplier + round; result = result >> shift; // result will fit a 32-bit range due to the REQUIRE on value - return (int32_t)result; + return static_cast<int32_t>(result); } int32_t apply_scale_16(int48_t value, int16_t multipler, int8_t shift) { REQUIRE(multiplier >= 0); REQUIRE(2 <= shift && shift <= 62); int64_t round = (1 << (shift - 1)); - int64_t result = (int64_t)value * multiplier + round; + int64_t result = static_cast<int64_t>(value) * multiplier + round; result = result >> shift; REQUIRE(result >= minimum<int32_t> && result <= maximum<int32_t>); - return (int32_t)result; + return static_cast<int32_t>(result); } ---- @@ -665,9 +692,9 @@ All table lookups are based on the following reference lookup function that take [source,c++] ---- -int32_t apply_lookup(int16_t *table, int32_t value) +int32_t apply_lookup_s(int16_t *table, int32_t value) { - int16_t clipped_value = (int16_t)apply_clip<int32_t>(value, -32768, +32767); + int16_t clipped_value = static_cast<int16_t>(apply_clip_s<int32_t>(value, -32768, +32767)); int32_t index = (clipped_value + 32768) >> 7; int32_t fraction = clipped_value & 0x7f; int16_t base = table[index]; @@ -688,7 +715,7 @@ void generate_lookup_table(int16_t *table, int32_t (*reference)(int32_t)) { for (int i = -256; i <= 256; i++) { int32_t value = (*reference)(i); - table[i + 256] = (int16_t)apply_clip<int32_t>(value, -32768, +32767) + table[i + 256] = static_cast<int16_t>(apply_clip<int32_t>(value, -32768, +32767)); } } ---- diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc index c026089..55c35d4 100644 --- a/chapters/pseudocode.adoc +++ b/chapters/pseudocode.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2021-2022 ARM Limited +// (C) COPYRIGHT 2021-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -221,21 +221,44 @@ The following functions provide arithmetic while defining requirements such that [source,c++] ---- -in_t apply_add<in_t>(in_t a, in_t b) { +in_t apply_add_s<in_t>(in_t a, in_t b) { if (is_floating_point(in_t)) return a + b; - int64_t c = (int64_t)a + (int64_t)b; - REQUIRE(c >= minimum<in_t> && c <= maximum<in_t>); - return (in_t)c; + int64_t c = sign_extend<int64_t>(a) + sign_extend<int64_t>(b); + REQUIRE(c >= minimum_s<in_t> && c <= maximum_s<in_t>); + return static_cast<in_t>(c); +} + +in_t apply_add_u<in_t>(in_t a, in_t b) { + if (is_floating_point(in_t)) return a + b; + uint64_t c = zero_extend<uint64_t>(a) + zero_extend<uint64_t>(b); + REQUIRE(c >= minimum_u<in_u_t> && c <= maximum_u<in_u_t>); + return truncate<in_t>(c); +} + +in_t apply_arith_rshift<in_t>(in_t a, in_t b) { + int32_t c = sign_extend<int32_t>(a) >> sign_extend<int32_t>(b); + return static_cast<in_t>(c); +} + +in_t apply_intdiv_s<in_t>(in_t a, in_t b) { + int64_t c = sign_extend<int64_t>(a) / sign_extend<int64_t>(b); + REQUIRE(c >= minimum_s<in_t> && c <= maximum_s<in_t>); + return static_cast<in_t>(c); } in_t apply_ceil<in_t>(in_t input) { return input value rounded up to nearest integer } -in_t apply_clip<in_t>(in_t value, in_t min_val, in_t max_val) { - REQUIRE(min_val <= max_val); - value = apply_max(value, min_val); - value = apply_min(value, max_val); +in_t apply_clip_s<in_t>(in_t value, in_t min_val, in_t max_val) { + if (is_floating_point(in_t>) { + REQUIRE(min_val <= max_val); + } + else { + REQUIRE(sign_extend<int64_t>(min_val) <= sign_extend<int64_t>(max_val)); + } + value = apply_max_s<in_t>(value, min_val); + value = apply_min_s<in_t>(value, max_val); return value; } @@ -257,22 +280,37 @@ in_t apply_log<in_t>(in_t input) { return the natural logarithm of input } -in_t apply_max<in_t>(in_t a, in_t b) { +in_t apply_logical_rshift<in_t>(in_t a, in_t b) { + uint64_t c = zero_extend<uint32_t>(a) >> zero_extend<uint32_t>(b); + return static_cast<in_t>(c); +} + +in_t apply_max_s<in_t>(in_t a, in_t b) { if (is_floating_point(in_t)) { if (isNaN(a) || isNaN(b)) { return NaN; } + if (a >= b) return a; else return b; } - if (a >= b) return a; else return b; + // Integer version + if (sign_extend<int64_t>(a) >= sign_extend<int64_t>(b)) return a; else return b; } -in_t apply_min<in_t>(in_t a, in_t b) { +in_t apply_min_s<in_t>(in_t a, in_t b) { if (is_floating_point(in_t)) { if (isNaN(a) || isNaN(b)) { return NaN; } + if (a < b) return a; else return b; } - if (a < b) return a; else return b; + // Integer version + if (sign_extend<int64_t>(a) < sign_extend<int64_t>(b)) return a; else return b; +} + +in_t apply_mul_s<in_t>(in_t a, in_t b) { + if (is_floating_point(in_t)) return a * b; + int64_t c = sign_extend<int64_t>(a) * sign_extend<int64_t>(b); + return static_cast<in_t>(c); } in_t apply_pow<in_t>(in_t a, in_t b) { @@ -283,11 +321,17 @@ in_t apply_sqrt<in_t>(in_t input) { return the square root of input } -in_t apply_sub<in_t>(in_t a, in_t b) { +in_t apply_sub_s<in_t>(in_t a, in_t b) { if (is_floating_point(in_t)) return a - b; - int64_t c = (int64_t)a - (int64_t)b; - REQUIRE(c >= minimum<in_t> && c <= maximum<in_t>); - return (in_t)c; + int64_t c = sign_extend<int64_t>(a) - sign_extend<int64_t>(b); + REQUIRE(c >= minimum_s<in_t> && c <= maximum_s<in_t>); + return static_cast<in_t>(c); +} + +in_t apply_sub_u<in_t>(in_t a, in_t b) { + uint64_t c = zero_extend<uint64_t>(a) - zero_extend<uint64_t>(b); + REQUIRE(c >= minimum_u<in_u_t> && c <= maximum_u<in_u_t>); + return truncate<in_t>(c); } int32_t count_leading_zeros(int32_t a) { @@ -305,6 +349,69 @@ int32_t count_leading_zeros(int32_t a) { } ---- +==== Type Conversion Helpers + +The following definitions indicate the type to be used when the given parameters are provided. + +[source,c++] +---- + +// Returns a signed version of the given type +// A no-op for floating-point types +Type make_signed(Type in_t) +{ + switch(in_t) { + case bool_t: + return bool_t; + case i8_t: + return int8_t; + case i16_t: + return int16_t; + case i32_t: + return int32_t; + case i48_t: + return int48_t; + case fp16_t: + return fp16_t; + case bf16_t: + return bf16_t; + case fp32_t: + return fp32_t; + } +} + +// Returns the usigned type of the given type +// Error to call this with anything but i8_t or i16_t + +Type make_unsigned(Type in_t) +{ + ERROR_IF(in_t != i8_t && in_t != i16_t); + switch(in_t) { + case i8_t: + return uint8_t; + case i16_t: + return uint16_t; + } +} + +out_t static_cast<out_t>(in_t value) +{ + // Operates similar to the c++ standard static_cast + // Limited to simple numeric conversion for TOSA. + // Sign extends signed integer input types if needed + // Zero extends unsigned integer input types if needed + // Truncates when converting to a smaller width data type + // Conversion from integer to floating-point is exact if possible + // If converting between signless integer types, treated as signed integer +} + +out_t bitcast<out_t>(in_t value) +{ + // Treats the bits of value as if they were of type out_t + // Only supported for integer types of the same bit width +} +---- + ==== Numeric Conversion Helpers The following definitions are used in pseudocode to do numeric conversions. @@ -321,13 +428,17 @@ float_t round_to_nearest_float(in_t f) Converts the input value into floating-point, rounding to the nearest representable value. For the required precision see the section: Main inference precision requirements. -out_t sign_extend(in_t input) - Only valid for two's complement integer values where out_t has more bits than in_t. - Output = input - Replicate the top bit of input for all bits between the top bit of input and the top bit of output. +out_t sign_extend<out_t>(in_t input) + Floating point values are unchanged. + For two's complement integer values where out_t has more bits than in_t, replicate the top bit of input for all bits between the top bit of input and the top bit of output. + +out_t zero_extend<out_t>(in_t input) + Floating point values are unchanged. + For two's complement integer values where out_t has more bits than in_t, insert zero values for all bits between the top bit of input and the top bit of output. out_t truncate(in_t input) output is the sizeof(out_t) least significant bits in input. + Nop for floating-point types ---- The following definition is used to flatten a list of lists into a single list. @@ -389,4 +500,16 @@ float_t cos(angle) bool power_of_two(int32_t value) return true if value is a power of two, false otherwise + +in_out_t maximum_s<Type T> + return the maximum value when interpreting type T as a signed value as returned by the make_signed helper. + +in_out_t minimum_s<Type T> + return the minimum value when interpreting type T as a signed value as returned by the make_signed helper. + +in_out_t maximum_u<Type T> + return the maximum value when interpreting type T as an unsigned value as returned by the make_unsigned helper. + +in_out_t minimum_u<Type T> + return the minimum value when interpreting type T as an unsigned value as returned by the make_unsigned helper. ---- diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc index 713404c..8a3ceac 100644 --- a/chapters/reduction.adoc +++ b/chapters/reduction.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2021 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -77,7 +77,7 @@ for_each(index in shape1) { out_index[axis] = 0; in_out_t value = tensor_read<in_out_t>(input, shape1, index); in_out_t state = tensor_read<in_out_t>(output, shape, out_index); - state = apply_max<in_out_t>(state, value); + state = apply_max_s<in_out_t>(state, value); tensor_write<in_out_t>(output, shape, out_index, state); } ---- @@ -100,7 +100,7 @@ for_each(index in shape1) { out_index[axis] = 0; in_out_t value = tensor_read<in_out_t>(input, shape1, index); in_out_t state = tensor_read<in_out_t>(output, shape, out_index); - state = apply_min<in_out_t>(state, value); + state = apply_min_s<in_out_t>(state, value); tensor_write<in_out_t>(output, shape, out_index, state); } ---- @@ -123,7 +123,7 @@ for_each(index in shape1) { out_index[axis] = 0; in_out_t value = tensor_read<in_out_t>(input, shape1, index); in_out_t state = tensor_read<in_out_t>(output, shape, out_index); - state = state * value; + state = apply_mul_s<in_out_t>(state, value); tensor_write<in_out_t>(output, shape, out_index, state); } ---- @@ -146,7 +146,7 @@ for_each(index in shape1) { out_index[axis] = 0; in_out_t value = tensor_read<in_out_t>(input, shape1, index); in_out_t state = tensor_read<in_out_t>(output, shape, out_index); - state = apply_add<in_out_t>(state, value); + state = apply_add_s<in_out_t>(state, value); tensor_write<in_out_t>(output, shape, out_index, state); } ---- diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index 6387790..b9d54c1 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2022 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -31,12 +31,15 @@ if (axis == rank(shape1)-1) { ERROR_IF(flatten(left_shape, right_shape) != shape); for_each(left_index in left_shape) { for_each(right_index in right_shape) { - in_t max_value = minimum_value<in_t>; + in_t max_value = minimum_s<in_t>; out_t max_index = 0; for (i = 0; i < shape[axis]; i++) { dim_t index = flatten(left_index, [i], right_index); in_t value = tensor_read<in_t>(input, shape1, index); - if (value > max_value) { max_value = value; max_index = i; } + if (apply_max_s<in_t>(value, max_value) != max_value) { + max_value = value; + max_index = i; + } } dim_t index = flatten(left_index, right_index); tensor_write<out_t>(output, shape, index, max_index); @@ -54,8 +57,8 @@ include::{generated}/operators/AVG_POOL2D.adoc[] [source,c++] ---- -ERROR_IF(in_out_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(in_out_t != int8_t && output_zp != 0); // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && output_zp != 0); // Zero point only for int8_t ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1 ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); @@ -79,17 +82,19 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= c < C ) { // average, padding does not count if (0 <= y < IH and 0 <= x < IW) { count++; - acc_t value = tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]); - value = value - input_zp; - acc = apply_add<acc_t>(acc, value); + acc_t value = sign_extend<acc_t>(tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c])); + value = apply_sub_s<acc_t>(value, sign_extend<acc_t>(input_zp)); + acc = apply_add_s<acc_t>(acc, value); } } if (is_float(in_out_t)) { - output_val = acc / (float)count; + output_val = acc / static_cast<in_out_t>(count); } else { scale_t scale = reciprocal_scale(count); acc = apply_scale_32(acc, scale.multiplier, scale.shift, false); - output_val = (in_out_t)apply_clip<acc_t>(acc + output_zp, minimum<in_out_t>, maximum<in_out_t>) + acc = apply_add_s<acc_t>(acc, sign_extend<acc_t>(output_zp)); + acc = apply_clip_s<acc_t>(acc, minimum_s<in_out_t>, maximum_s<in_out_t>); + output_val = static_cast<in_out_t>(acc); } tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], output_val); } @@ -103,7 +108,7 @@ include::{generated}/operators/CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t ERROR_IF(weight_t != int8_t && weight_zp != 0); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_y < 1 || stride_x < 1); @@ -120,14 +125,18 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= y < IH && 0 <= x < IW) { - out_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic]); - out_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, + [N,IH,IW,IC], + [n,y,x,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, + [OC,KH,KW,IC], + [oc,ky,kx,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } ---- @@ -140,8 +149,8 @@ include::{generated}/operators/CONV3D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(pad_d0 < 0 || pad_d1 < 0 || pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_d < 1 || stride_y < 1 || stride_x < 1); ERROR_IF(dilation_d < 1 || dilation_y < 1 || dilation_x < 1); @@ -160,14 +169,18 @@ for_each(0 <= n < N, 0 <= od < OD, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= x < IW && 0 <= y < IH && 0 <= d < ID) { - out_t value = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic]); - out_t weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, + [N,ID,IH,IW,IC], + [n,d,y,x,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, + [OC,KD,KH,KW,IC], + [oc,kd,ky,kx,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); tensor_write<out_t>(output, [N,OD,OH,OW,OC], [n,od,oy,ox,oc], acc); } ---- @@ -180,8 +193,8 @@ include::{generated}/operators/DEPTHWISE_CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(dilation_y < 1 || dilation_x < 1); @@ -197,14 +210,18 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C, 0 <= m < M) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= y < IH && 0 <= x < IW) { - out_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c]); - out_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, + [N,IH,IW,C], + [n,y,x,c])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, + [KH,KW,C,M], + [ky,kx,c,m])); + value = apply_sub_s<out_t>(value, static_cast<out_t>input_zp); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>weight_zp); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : (c * M) + m]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : (c * M) + m]); tensor_write<out_t>(output, [N,OH,OW,C * M], [n,oy,ox,c * M + m], acc); } ---- @@ -262,20 +279,20 @@ include::{generated}/operators/FULLY_CONNECTED.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(BC != OC && BC != 1); for_each(0 <= n < N, 0 <= oc < OC) { out_t acc = 0; for_each(0 <= ic < IC) { - out_t value = tensor_read<in_t>(input, [N,IC], [n,ic]); - out_t weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, [N,IC], [n,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, [OC,IC], [oc,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); tensor_write<out_t>(output, [N,OC], [n,oc], acc); } ---- @@ -288,15 +305,15 @@ include::{generated}/operators/MATMUL.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t +ERROR_IF(in_t != i8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) { out_t acc = 0; for_each(0 <= c < C) { - out_t value1 = tensor_read<in_t>(A, [N,H,C], [n,h,c]); - out_t value2 = tensor_read<in_t>(B, [N,C,W], [n,c,w]); - value1 = value1 - A_zp; - value2 = value2 - B_zp; - acc = apply_add<out_t>(acc, value1 * value2); + out_t value1 = static_cast<out_t>(tensor_read<in_t>(A, [N,H,C], [n,h,c])); + out_t value2 = static_cast<out_t>(tensor_read<in_t>(B, [N,C,W], [n,c,w])); + value1 = apply_sub_s<out_t>(value1, static_cast<out_t>(A_zp)); + value2 = apply_sub_s<out_t>(value2, static_cast<out_t>(B_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value1 * value2)); } tensor_write<out_t>(output, [N,H,W], [n,h,w], acc); } @@ -329,7 +346,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { index_t x = ix + kx; if (y >= 0 && y < IH && x >= 0 && x < IW) { in_out_t value = tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]); - acc = apply_max(acc, value); + acc = apply_max_s<in_out_t>(acc, value); } } tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc); @@ -376,8 +393,8 @@ include::{generated}/operators/TRANSPOSE_CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only allowed for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only allowed for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(out_pad_top <= -KH || out_pad_bottom <= -KH); ERROR_IF(out_pad_left <= -KW || out_pad_right <= -KW); ERROR_IF(stride_y < 1 || stride_x < 1); @@ -393,12 +410,12 @@ for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, index_t oy = iy * stride_y + out_pad_top + ky; index_t ox = ix * stride_x + out_pad_left + kx; if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) { - out_t acc = tensor_read<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc]); - out_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic]); - out_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t acc = static_cast<out_t>(tensor_read<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc])); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } } diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc index 415c5d9..90aca2d 100644 --- a/chapters/type_conversion.adoc +++ b/chapters/type_conversion.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2022 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -29,7 +29,7 @@ for_each(index in shape) { } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) { out = apply_clip<out_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>); } else if (sizeof(out_t) >= sizeof(in_t)) { - out = sign_extend(in); + out = sign_extend<out_t>(in); } else { out = truncate(in); } @@ -49,24 +49,49 @@ for_each(index in shape) { // uint16 values can have zero_point 0 or 32768 // int8/uint8 can have zero point within their valid range // No other types can have zero point != 0 - ERROR_IF(in_t != int8_t && - in_t != uint8_t && - in_t != uint16_t && input_zp != 0); - ERROR_IF(out_t != int8_t && - out_t != uint8_t && - out_t != uint16_t && output_zp != 0); - ERROR_IF(in_t == uint16_t && input_zp != 0 && input_zp != 32768); - ERROR_IF(out_t == uint16_t && output_zp != 0 && output_zp != 32768); - ERROR_IF(scale32 && in_t == int48_t); + ERROR_IF(in_t != i8_t && + (in_t != i16_t || input_unsigned == False) && input_zp != 0); + ERROR_IF(out_t != i8_t && + (out_t != i16_t || output_unsigned == False) && output_zp != 0); + ERROR_IF(in_t == i16_t && input_unsigned == True && input_zp != 0 && input_zp != 32768); + ERROR_IF(out_t == i16_t && output_unsigned == True && output_zp != 0 && output_zp != 32768); + ERROR_IF(scale32 && in_t == i48_t); ERROR_IF(!scale32 && double_round); - int48_t value = tensor_read<in_t>(input, shape, index); - value = value - input_zp; + ERROR_IF(in_t == i16_t && out_t == i32_t && input_unsigned); + ERROR_IF(in_t == i32_t && out_t == i16_t && output_unsigned); + + in_t in_value = tensor_read<in_t>(input, shape, index); + + int48_t value, extended_in_zp; + if (input_unsigned) { + value = zero_extend<int48_t>(in_value); + extended_in_zp = zero_extend<int48_t>(input_zp); + } + else { + value = sign_extend<int48_t>(value); + extended_in_zp = sign_extend<int48_t>(input_zp); + } + + value = value - extended_in_zp; int c = (per_channel) ? index[rank(input) - 1] : 0; int32_t result = (scale32) ? apply_scale_32(value, multiplier[c], shift[c], double_round) : apply_scale_16(value, multiplier[c], shift[c]); - result = apply_add<int32_t>(result, output_zp); - out_t out = (out_t)apply_clip<int32_t>(result, minimum<out_t>, maximum<out_t>); + + if (output_unsigned) { + int32_t extended_out_zp = zero_extend<int32_t>(output_zp); + result = apply_add_s<int32_t>(result, extended_out_zp); + out_t out = static_cast<out_t>(apply_clip<int32_t>(result, + minimum_u<out_t>, + maximum_u<out_t>)); + } + else { + int32_t extended_out_zp = sign_extend<int32_t>(output_zp); + result = apply_add_s<int32_t>(result, extended_out_zp); + out_t out = static_cast<out_t>(apply_clip<int32_t>(result, + minimum_s<out_t>, + maximum_s<out_t>)); + } tensor_write<out_t>(output, shape, index, out); } ---- |