From fb0284e2912bd5fd73bf6f476901490e04c330a2 Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Tue, 18 Jul 2023 15:20:53 -0700 Subject: Change TOSA specification to signless types Integer inputs and outputs to TOSA operators are now defined as signless values. In most instances the operator will used signed arithmetic as indicated in previous versions of the specification resulting in little functional change to the specification. New attributes have been added to the RESCALE operator to indicate whether the input and output values should be treated as signed or unsigned. Explicit use of static_cast, sign_extend, zero_extend and truncate are added to the pseudocode to avoid ambiguity. Change-Id: I71c67d3e5aeaabc418c768f821fce6ee3eebb65b --- chapters/ewise_binary.adoc | 48 ++--- chapters/ewise_unary.adoc | 23 ++- chapters/image.adoc | 23 +-- chapters/introduction.adoc | 53 ++++-- chapters/pseudocode.adoc | 165 ++++++++++++++--- chapters/reduction.adoc | 10 +- chapters/tensor_ops.adoc | 129 +++++++------ chapters/type_conversion.adoc | 55 ++++-- tools/dictionary.dic | 2 + tosa.xml | 408 +++++++++++++++++++++--------------------- tosa.xsd | 12 +- 11 files changed, 565 insertions(+), 363 deletions(-) diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc index 35e454a..876ab4b 100644 --- a/chapters/ewise_binary.adoc +++ b/chapters/ewise_binary.adoc @@ -22,7 +22,7 @@ if (in_out_t == shape_t) { ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); shape_t value1 = tensor_read(input1, [], []); shape_t value2 = tensor_read(input2, [], []); - shape_t result = apply_add(value1, value2); + shape_t result = apply_add_s(value1, value2); tensor_write(output, [], [], result); } else { ERROR_IF(shape != broadcast_shape(shape1, shape2)); @@ -31,7 +31,7 @@ if (in_out_t == shape_t) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read(input1, shape1, index1); in_out_t value2 = tensor_read(input2, shape2, index2); - in_out_t result = apply_add(value1, value2); + in_out_t result = apply_add_s(value1, value2); tensor_write(output, shape, index, result); } } @@ -54,15 +54,16 @@ for_each(index in shape) { in_out_t value2 = tensor_read(input2, shape2, index2); // Ensure that shift amount is appropriate for the data type - REQUIRE((in_out_t == int32_t && 0 <= value2 && value2 <= 31) || - (in_out_t == int16_t && 0 <= value2 && value2 <= 15) || - (in_out_t == int8_t && 0 <= value2 && value2 <= 7)); + REQUIRE((in_out_t == i32_t && 0 <= value2 && value2 <= 31) || + (in_out_t == i16_t && 0 <= value2 && value2 <= 15) || + (in_out_t == i8_t && 0 <= value2 && value2 <= 7)); - in_out_t result = value1 >> value2; - if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) { + in_out_t result = apply_arith_rshift(value1, value2); + if (round == true && static_cast(value2) > 0 && + (apply_arith_rshift(value1, apply_sub_s(value2, 1)) & 1 != 0) { result = result + 1; } - result = apply_clip(result, minimum, maximum); + result = apply_clip_s(result, minimum_s, maximum_s); tensor_write(output, shape, index, result); } ---- @@ -156,8 +157,8 @@ if (in_out_t == shape_t) { REQUIRE(value2 != 0); // This catches the case where we divide minimum by -1 // which is not representable in two's complement - REQUIRE((int64_t)value1 / value2 <= maximum); - in_out_t result = value1 / value2; + REQUIRE(static_cast(value1) / static_cast(value2) <= maximum_s); + in_out_t result = apply_intdiv_s(value1, value2); tensor_write(output, shape, index, result); } } @@ -219,8 +220,9 @@ for_each(index in shape) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read(input1, shape1, index1); in_out_t value2 = tensor_read(input2, shape2, index2); - REQUIRE(0 <= value2 && value2 <= 31); - in_out_t result = (in_out_t)((unsigned in_out_t)value1 >> value2); + REQUIRE(0 <= static_cast(value2) && static_cast(value2) <= 31); + // Logical shifts happen as unsigned types internally + in_out_t result = apply_logical_rshift(value1, value2); tensor_write(output, shape, index, result); } ---- @@ -280,7 +282,7 @@ for_each(index in shape) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read(input1, shape1, index1); in_out_t value2 = tensor_read(input2, shape2, index2); - in_out_t result = apply_max(value1, value2); + in_out_t result = apply_max_s(value1, value2); tensor_write(output, shape, index, result); } ---- @@ -300,7 +302,7 @@ for_each(index in shape) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read(input1, shape1, index1); in_out_t value2 = tensor_read(input2, shape2, index2); - in_out_t result = apply_min(value1, value2); + in_out_t result = apply_min_s(value1, value2); tensor_write(output, shape, index, result); } ---- @@ -330,14 +332,14 @@ if (in_out_t == shape_t) { in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); out_t result; - if (in_t == int32_t && shift > 0) { - int64_t product = (int64_t)value1 * (int64_t)value2; - int64_t round = (int64_t)1 << (shift-1); + if (in_t == i32_t && shift > 0) { + int64_t product = sign_extend(value1) * sign_extend(value2); + int64_t round = static_cast(1) << (shift - 1); product = (product + round) >> shift; - REQUIRE(product >= minimum && product <= maximum) + REQUIRE(product >= minimum_s && product <= maximum_s) result = product; } else { - result = value1 * value2; // low 32-bits of result for int32_t + result = apply_mul_s(value1, value2); // low 32-bits of result for i32_t } tensor_write(output, shape, index, result); } @@ -386,7 +388,7 @@ if (in_out_t == shape_t) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read(input1, shape1, index1); in_out_t value2 = tensor_read(input2, shape2, index2); - in_out_t result = apply_sub(value1, value2); + in_out_t result = apply_sub_s(value1, value2); tensor_write(output, shape, index, result); } } @@ -416,11 +418,11 @@ REQUIRE(length(table) == TABLE_SIZE); for_each(index in shape) { in_t value = tensor_read(input, shape, index); out_t result; - if (in_t == int8_t) { + if (in_t == i8_t) { // value is a signed int, convert to a 0 based index - result = table[value + 128]; + result = table[static_cast(value) + 128]; } else { - result = apply_lookup(table, value); + result = apply_lookup_s(static_cast(table), static_cast(value)); } tensor_write(output, shape, index, result); } diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc index f630a48..d3eacc4 100644 --- a/chapters/ewise_unary.adoc +++ b/chapters/ewise_unary.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2022 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -26,11 +26,12 @@ include::{generated}/operators/ABS.adoc[] ---- for_each(index in shape) { in_out_t value1 = tensor_read(input1, shape, index); - if (in_out_t == float_t && value1 == -0.0) { + if (is_floating_point(in_out_t) && value1 == -0.0) { value1 = 0.0; } - if (value1 < 0.0) - value1 = apply_sub(0, value1); + if (static_cast(value1) < 0.0) { + value1 = apply_sub_s(0, value1); + } tensor_write(output, shape, index, value1); } ---- @@ -183,13 +184,17 @@ include::{generated}/operators/NEGATE.adoc[] [source,c++] ---- -ERROR_IF(in_out_t != int8_t && input1_zp != 0) // Zero point only for int8_t -ERROR_IF(in_out_t != int8_t && output_zp != 0) // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && input1_zp != 0) // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && output_zp != 0) // Zero point only for int8_t for_each(index in shape) { in_out_t value1 = tensor_read(input1, shape, index); - acc_t value = (acc_t)value1 - input1_zp; - value = apply_sub(0, value); - in_out_t result = (in_out_t)apply_clip(value + output_zp, minimum, maximum); + acc_t value = apply_sub_s(sign_extend(value1), + sign_extend(input1_zp)); + value = apply_sub_s(0, value); + value = apply_add_s(value, sign_extend(output_zp)); + in_out_t result = truncate(apply_clip_s(value, + minimum_s, + maximum_s)); tensor_write(output, shape, index, result); } ---- diff --git a/chapters/image.adoc b/chapters/image.adoc index 8abc878..da839f8 100644 --- a/chapters/image.adoc +++ b/chapters/image.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2021 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -91,22 +91,23 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) { int16_t rx = x - ix * scale_x_n; // (x % scale_x_n) if (is_floating_point(resize_t)) { - dy = (resize_t)ry / (resize_t)scale_y_n; - dx = (resize_t)rx / (resize_t)scale_x_n; + dy = static_cast(ry) / static_cast(scale_y_n); + dx = static_cast(rx) / static_cast(scale_x_n); } else { dy = ry; dx = rx; } // Note that -1 <= iy < IH and -1 <= ix < IW - int16_t iy0 = apply_max(iy, 0); - int16_t iy1 = apply_min(iy + 1, IH - 1); - int16_t ix0 = apply_max(ix, 0); - int16_t ix1 = apply_min(ix + 1, IW - 1); + int16_t iy0 = apply_max_s(iy, 0); + int16_t iy1 = apply_min_s(iy + 1, IH - 1); + int16_t ix0 = apply_max_s(ix, 0); + int16_t ix1 = apply_min_s(ix + 1, IW - 1); if (mode==BILINEAR) { - in_t v00 = tensor_read(input, [N,IH,IW,C], [n,iy0,ix0,c]); - in_t v01 = tensor_read(input, [N,IH,IW,C], [n,iy0,ix1,c]); - in_t v10 = tensor_read(input, [N,IH,IW,C], [n,iy1,ix0,c]); - in_t v11 = tensor_read(input, [N,IH,IW,C], [n,iy1,ix1,c]); + using in_s_t = make_signed(in_t); // Use signed calculations for i8/i16 + in_s_t v00 = static_cast(tensor_read(input, [N,IH,IW,C], [n,iy0,ix0,c])); + in_s_t v01 = static_cast(tensor_read(input, [N,IH,IW,C], [n,iy0,ix1,c])); + in_s_t v10 = static_cast(tensor_read(input, [N,IH,IW,C], [n,iy1,ix0,c])); + in_s_t v11 = static_cast(tensor_read(input, [N,IH,IW,C], [n,iy1,ix1,c])); acc = v00 * (unit_y - dy) * (unit_x - dx); acc += v01 * (unit_y - dy) * dx; acc += v10 * dy * (unit_x - dx); diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc index d6f7bf9..26fef0e 100644 --- a/chapters/introduction.adoc +++ b/chapters/introduction.adoc @@ -338,11 +338,11 @@ fp64_t acc_prec; // 1<<(M+1) where M is the number of mantissa bits fp64_t acc_min_normal; // accumulator minimum normal greater than zero fp64_t two_m63 = -1.0/(fp64)((int64_t)-1<<63); // pow(2, -63) switch (acc_t) { - case fp32_t: acc_prec = (fp64_t)(1<<24); // pow(2, 24) + case fp32_t: acc_prec = static_cast(1<<24); // pow(2, 24) acc_min_normal = two_m63 * two_m63; // pow(2, -126) break; - case fp16_t: acc_prec = (fp64_t)(1<<11); // pow(2, 11) - acc_min_normal = 1.0/(fp64_t)(1<<14); // pow(2, -14) + case fp16_t: acc_prec = static_cast(1<<11); // pow(2, 11) + acc_min_normal = 1.0/static_cast(1<<14); // pow(2, -14) break; default: ERROR_IF(true); } @@ -359,7 +359,7 @@ for_each(index in output_shape) { out_err = 0.0; } else { // 0.0 < out_bnd < infinity out_bnd = max(out_bnd, acc_min_normal); - out_err = ((fp64_t)out_imp - out_ref)*acc_prec/out_bnd; + out_err = (static_cast(out_imp) - out_ref) * acc_prec / out_bnd; REQUIRE(abs(out_err) <= ksb); } out_err_sum += out_err; @@ -457,11 +457,21 @@ The number formats supported by a given operator are listed in its table of supp | - |Boolean value. Size implementation defined. The TOSA reference model implements this as int8_t with 0 for false and 1 for true. All non-zero values are accepted on input as true. +|i4_t +| - +| - +|Signless 4-bit integer type. Will be interpreted as int4_t by all operators + |int4_t | -7 | +7 |Signed 4-bit two's-complement value. Excludes -8 to maintain a symmetric about zero range for weights. +|i8_t +| - +| - +|Signless 8-bit integer value. Will be interpreted as int8_t unless otherwise specified by an operator. + |int8_t | -128 | +127 @@ -470,7 +480,12 @@ The number formats supported by a given operator are listed in its table of supp |uint8_t | 0 | 255 -|Unsigned 8-bit value. +|Unsigned 8-bit integer value. + +|i16_t +| - +| - +|Signless 16-bit integer type. Will be interpreted as int16_t unless otherwise specified by an operator. |int16_t | -32768 @@ -482,11 +497,21 @@ The number formats supported by a given operator are listed in its table of supp | 65535 |Unsigned 16-bit value. +|i32_t +| - +| - +|Signless 32-bit integer value. Will be interpreted as int32_t by all operators. + |int32_t | -(1<<31) | (1<<31)-1 |Signed 32-bit two's-complement value. +|i48_t +| - +| - +|Signless 32-bit integer value. Will be interpreted as int48_t by all operators. + |int48_t | -(1<<47) | (1<<47)-1 @@ -542,7 +567,9 @@ This ensures that a Base Inference profile TOSA implementation can calculate the === Integer Behavior -Integer calculations must be standard two's-complement or unsigned calculations. +TOSA integer inputs and outputs are specified by signless values with the given number of bits. +Unless otherwise specified, these values will be interpreted as signed twos-complement. +The pseudocode will use int*_t to indicate use as a signed value and uint*_t to indicate use as an unsigned value. If overflow occurs doing integer calculation, the result is unpredictable, as indicated by the REQUIRE checks in the pseudocode for the operators. Unsigned 8 and 16-bit values are only allowed in the RESCALE operation, to allow for compatibility with networks which expect unsigned 8-bit or 16-bit tensors for input and output. @@ -598,20 +625,20 @@ int32_t apply_scale_32(int32_t value, int32_t multiplier, int8_t shift, bool_t d if (shift > 31 && value >= 0) round += 1<<30; if (shift > 31 && value < 0) round -= 1<<30; } - int64_t result = (int64_t)value * multiplier + round; + int64_t result = static_cast(value) * multiplier + round; result = result >> shift; // result will fit a 32-bit range due to the REQUIRE on value - return (int32_t)result; + return static_cast(result); } int32_t apply_scale_16(int48_t value, int16_t multipler, int8_t shift) { REQUIRE(multiplier >= 0); REQUIRE(2 <= shift && shift <= 62); int64_t round = (1 << (shift - 1)); - int64_t result = (int64_t)value * multiplier + round; + int64_t result = static_cast(value) * multiplier + round; result = result >> shift; REQUIRE(result >= minimum && result <= maximum); - return (int32_t)result; + return static_cast(result); } ---- @@ -665,9 +692,9 @@ All table lookups are based on the following reference lookup function that take [source,c++] ---- -int32_t apply_lookup(int16_t *table, int32_t value) +int32_t apply_lookup_s(int16_t *table, int32_t value) { - int16_t clipped_value = (int16_t)apply_clip(value, -32768, +32767); + int16_t clipped_value = static_cast(apply_clip_s(value, -32768, +32767)); int32_t index = (clipped_value + 32768) >> 7; int32_t fraction = clipped_value & 0x7f; int16_t base = table[index]; @@ -688,7 +715,7 @@ void generate_lookup_table(int16_t *table, int32_t (*reference)(int32_t)) { for (int i = -256; i <= 256; i++) { int32_t value = (*reference)(i); - table[i + 256] = (int16_t)apply_clip(value, -32768, +32767) + table[i + 256] = static_cast(apply_clip(value, -32768, +32767)); } } ---- diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc index c026089..55c35d4 100644 --- a/chapters/pseudocode.adoc +++ b/chapters/pseudocode.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2021-2022 ARM Limited +// (C) COPYRIGHT 2021-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -221,21 +221,44 @@ The following functions provide arithmetic while defining requirements such that [source,c++] ---- -in_t apply_add(in_t a, in_t b) { +in_t apply_add_s(in_t a, in_t b) { if (is_floating_point(in_t)) return a + b; - int64_t c = (int64_t)a + (int64_t)b; - REQUIRE(c >= minimum && c <= maximum); - return (in_t)c; + int64_t c = sign_extend(a) + sign_extend(b); + REQUIRE(c >= minimum_s && c <= maximum_s); + return static_cast(c); +} + +in_t apply_add_u(in_t a, in_t b) { + if (is_floating_point(in_t)) return a + b; + uint64_t c = zero_extend(a) + zero_extend(b); + REQUIRE(c >= minimum_u && c <= maximum_u); + return truncate(c); +} + +in_t apply_arith_rshift(in_t a, in_t b) { + int32_t c = sign_extend(a) >> sign_extend(b); + return static_cast(c); +} + +in_t apply_intdiv_s(in_t a, in_t b) { + int64_t c = sign_extend(a) / sign_extend(b); + REQUIRE(c >= minimum_s && c <= maximum_s); + return static_cast(c); } in_t apply_ceil(in_t input) { return input value rounded up to nearest integer } -in_t apply_clip(in_t value, in_t min_val, in_t max_val) { - REQUIRE(min_val <= max_val); - value = apply_max(value, min_val); - value = apply_min(value, max_val); +in_t apply_clip_s(in_t value, in_t min_val, in_t max_val) { + if (is_floating_point(in_t>) { + REQUIRE(min_val <= max_val); + } + else { + REQUIRE(sign_extend(min_val) <= sign_extend(max_val)); + } + value = apply_max_s(value, min_val); + value = apply_min_s(value, max_val); return value; } @@ -257,22 +280,37 @@ in_t apply_log(in_t input) { return the natural logarithm of input } -in_t apply_max(in_t a, in_t b) { +in_t apply_logical_rshift(in_t a, in_t b) { + uint64_t c = zero_extend(a) >> zero_extend(b); + return static_cast(c); +} + +in_t apply_max_s(in_t a, in_t b) { if (is_floating_point(in_t)) { if (isNaN(a) || isNaN(b)) { return NaN; } + if (a >= b) return a; else return b; } - if (a >= b) return a; else return b; + // Integer version + if (sign_extend(a) >= sign_extend(b)) return a; else return b; } -in_t apply_min(in_t a, in_t b) { +in_t apply_min_s(in_t a, in_t b) { if (is_floating_point(in_t)) { if (isNaN(a) || isNaN(b)) { return NaN; } + if (a < b) return a; else return b; } - if (a < b) return a; else return b; + // Integer version + if (sign_extend(a) < sign_extend(b)) return a; else return b; +} + +in_t apply_mul_s(in_t a, in_t b) { + if (is_floating_point(in_t)) return a * b; + int64_t c = sign_extend(a) * sign_extend(b); + return static_cast(c); } in_t apply_pow(in_t a, in_t b) { @@ -283,11 +321,17 @@ in_t apply_sqrt(in_t input) { return the square root of input } -in_t apply_sub(in_t a, in_t b) { +in_t apply_sub_s(in_t a, in_t b) { if (is_floating_point(in_t)) return a - b; - int64_t c = (int64_t)a - (int64_t)b; - REQUIRE(c >= minimum && c <= maximum); - return (in_t)c; + int64_t c = sign_extend(a) - sign_extend(b); + REQUIRE(c >= minimum_s && c <= maximum_s); + return static_cast(c); +} + +in_t apply_sub_u(in_t a, in_t b) { + uint64_t c = zero_extend(a) - zero_extend(b); + REQUIRE(c >= minimum_u && c <= maximum_u); + return truncate(c); } int32_t count_leading_zeros(int32_t a) { @@ -305,6 +349,69 @@ int32_t count_leading_zeros(int32_t a) { } ---- +==== Type Conversion Helpers + +The following definitions indicate the type to be used when the given parameters are provided. + +[source,c++] +---- + +// Returns a signed version of the given type +// A no-op for floating-point types +Type make_signed(Type in_t) +{ + switch(in_t) { + case bool_t: + return bool_t; + case i8_t: + return int8_t; + case i16_t: + return int16_t; + case i32_t: + return int32_t; + case i48_t: + return int48_t; + case fp16_t: + return fp16_t; + case bf16_t: + return bf16_t; + case fp32_t: + return fp32_t; + } +} + +// Returns the usigned type of the given type +// Error to call this with anything but i8_t or i16_t + +Type make_unsigned(Type in_t) +{ + ERROR_IF(in_t != i8_t && in_t != i16_t); + switch(in_t) { + case i8_t: + return uint8_t; + case i16_t: + return uint16_t; + } +} + +out_t static_cast(in_t value) +{ + // Operates similar to the c++ standard static_cast + // Limited to simple numeric conversion for TOSA. + // Sign extends signed integer input types if needed + // Zero extends unsigned integer input types if needed + // Truncates when converting to a smaller width data type + // Conversion from integer to floating-point is exact if possible + // If converting between signless integer types, treated as signed integer +} + +out_t bitcast(in_t value) +{ + // Treats the bits of value as if they were of type out_t + // Only supported for integer types of the same bit width +} +---- + ==== Numeric Conversion Helpers The following definitions are used in pseudocode to do numeric conversions. @@ -321,13 +428,17 @@ float_t round_to_nearest_float(in_t f) Converts the input value into floating-point, rounding to the nearest representable value. For the required precision see the section: Main inference precision requirements. -out_t sign_extend(in_t input) - Only valid for two's complement integer values where out_t has more bits than in_t. - Output = input - Replicate the top bit of input for all bits between the top bit of input and the top bit of output. +out_t sign_extend(in_t input) + Floating point values are unchanged. + For two's complement integer values where out_t has more bits than in_t, replicate the top bit of input for all bits between the top bit of input and the top bit of output. + +out_t zero_extend(in_t input) + Floating point values are unchanged. + For two's complement integer values where out_t has more bits than in_t, insert zero values for all bits between the top bit of input and the top bit of output. out_t truncate(in_t input) output is the sizeof(out_t) least significant bits in input. + Nop for floating-point types ---- The following definition is used to flatten a list of lists into a single list. @@ -389,4 +500,16 @@ float_t cos(angle) bool power_of_two(int32_t value) return true if value is a power of two, false otherwise + +in_out_t maximum_s + return the maximum value when interpreting type T as a signed value as returned by the make_signed helper. + +in_out_t minimum_s + return the minimum value when interpreting type T as a signed value as returned by the make_signed helper. + +in_out_t maximum_u + return the maximum value when interpreting type T as an unsigned value as returned by the make_unsigned helper. + +in_out_t minimum_u + return the minimum value when interpreting type T as an unsigned value as returned by the make_unsigned helper. ---- diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc index 713404c..8a3ceac 100644 --- a/chapters/reduction.adoc +++ b/chapters/reduction.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2021 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -77,7 +77,7 @@ for_each(index in shape1) { out_index[axis] = 0; in_out_t value = tensor_read(input, shape1, index); in_out_t state = tensor_read(output, shape, out_index); - state = apply_max(state, value); + state = apply_max_s(state, value); tensor_write(output, shape, out_index, state); } ---- @@ -100,7 +100,7 @@ for_each(index in shape1) { out_index[axis] = 0; in_out_t value = tensor_read(input, shape1, index); in_out_t state = tensor_read(output, shape, out_index); - state = apply_min(state, value); + state = apply_min_s(state, value); tensor_write(output, shape, out_index, state); } ---- @@ -123,7 +123,7 @@ for_each(index in shape1) { out_index[axis] = 0; in_out_t value = tensor_read(input, shape1, index); in_out_t state = tensor_read(output, shape, out_index); - state = state * value; + state = apply_mul_s(state, value); tensor_write(output, shape, out_index, state); } ---- @@ -146,7 +146,7 @@ for_each(index in shape1) { out_index[axis] = 0; in_out_t value = tensor_read(input, shape1, index); in_out_t state = tensor_read(output, shape, out_index); - state = apply_add(state, value); + state = apply_add_s(state, value); tensor_write(output, shape, out_index, state); } ---- diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index 6387790..b9d54c1 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2022 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -31,12 +31,15 @@ if (axis == rank(shape1)-1) { ERROR_IF(flatten(left_shape, right_shape) != shape); for_each(left_index in left_shape) { for_each(right_index in right_shape) { - in_t max_value = minimum_value; + in_t max_value = minimum_s; out_t max_index = 0; for (i = 0; i < shape[axis]; i++) { dim_t index = flatten(left_index, [i], right_index); in_t value = tensor_read(input, shape1, index); - if (value > max_value) { max_value = value; max_index = i; } + if (apply_max_s(value, max_value) != max_value) { + max_value = value; + max_index = i; + } } dim_t index = flatten(left_index, right_index); tensor_write(output, shape, index, max_index); @@ -54,8 +57,8 @@ include::{generated}/operators/AVG_POOL2D.adoc[] [source,c++] ---- -ERROR_IF(in_out_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(in_out_t != int8_t && output_zp != 0); // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && output_zp != 0); // Zero point only for int8_t ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1 ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); @@ -79,17 +82,19 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= c < C ) { // average, padding does not count if (0 <= y < IH and 0 <= x < IW) { count++; - acc_t value = tensor_read(input, [N,IH,IW,C], [n,y,x,c]); - value = value - input_zp; - acc = apply_add(acc, value); + acc_t value = sign_extend(tensor_read(input, [N,IH,IW,C], [n,y,x,c])); + value = apply_sub_s(value, sign_extend(input_zp)); + acc = apply_add_s(acc, value); } } if (is_float(in_out_t)) { - output_val = acc / (float)count; + output_val = acc / static_cast(count); } else { scale_t scale = reciprocal_scale(count); acc = apply_scale_32(acc, scale.multiplier, scale.shift, false); - output_val = (in_out_t)apply_clip(acc + output_zp, minimum, maximum) + acc = apply_add_s(acc, sign_extend(output_zp)); + acc = apply_clip_s(acc, minimum_s, maximum_s); + output_val = static_cast(acc); } tensor_write(output, [N,OH,OW,C], [n,oy,ox,c], output_val); } @@ -103,7 +108,7 @@ include::{generated}/operators/CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t ERROR_IF(weight_t != int8_t && weight_zp != 0); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_y < 1 || stride_x < 1); @@ -120,14 +125,18 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= y < IH && 0 <= x < IW) { - out_t value = tensor_read(input, [N,IH,IW,IC], [n,y,x,ic]); - out_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add(acc, value * weight); + out_t value = static_cast(tensor_read(input, + [N,IH,IW,IC], + [n,y,x,ic])); + out_t weight = static_cast(tensor_read(weight, + [OC,KH,KW,IC], + [oc,ky,kx,ic])); + value = apply_sub_s(value, static_cast(input_zp)); + weight = apply_sub_s(weight, static_cast(weight_zp)); + acc = apply_add_s(acc, apply_mul_s(value, weight)); } } - acc = apply_add(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s(acc, bias[(BC == 1) ? 0 : oc]); tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } ---- @@ -140,8 +149,8 @@ include::{generated}/operators/CONV3D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(pad_d0 < 0 || pad_d1 < 0 || pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_d < 1 || stride_y < 1 || stride_x < 1); ERROR_IF(dilation_d < 1 || dilation_y < 1 || dilation_x < 1); @@ -160,14 +169,18 @@ for_each(0 <= n < N, 0 <= od < OD, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= x < IW && 0 <= y < IH && 0 <= d < ID) { - out_t value = tensor_read(input, [N,ID,IH,IW,IC], [n,d,y,x,ic]); - out_t weight = tensor_read(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add(acc, value * weight); + out_t value = static_cast(tensor_read(input, + [N,ID,IH,IW,IC], + [n,d,y,x,ic])); + out_t weight = static_cast(tensor_read(weight, + [OC,KD,KH,KW,IC], + [oc,kd,ky,kx,ic])); + value = apply_sub_s(value, static_cast(input_zp)); + weight = apply_sub_s(weight, static_cast(weight_zp)); + acc = apply_add_s(acc, apply_mul_s(value, weight)); } } - acc = apply_add(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s(acc, bias[(BC == 1) ? 0 : oc]); tensor_write(output, [N,OD,OH,OW,OC], [n,od,oy,ox,oc], acc); } ---- @@ -180,8 +193,8 @@ include::{generated}/operators/DEPTHWISE_CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(dilation_y < 1 || dilation_x < 1); @@ -197,14 +210,18 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C, 0 <= m < M) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= y < IH && 0 <= x < IW) { - out_t value = tensor_read(input, [N,IH,IW,C], [n,y,x,c]); - out_t weight = tensor_read(weight, [KH,KW,C,M], [ky,kx,c,m]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add(acc, value * weight); + out_t value = static_cast(tensor_read(input, + [N,IH,IW,C], + [n,y,x,c])); + out_t weight = static_cast(tensor_read(weight, + [KH,KW,C,M], + [ky,kx,c,m])); + value = apply_sub_s(value, static_castinput_zp); + weight = apply_sub_s(weight, static_castweight_zp); + acc = apply_add_s(acc, apply_mul_s(value, weight)); } } - acc = apply_add(acc, bias[(BC == 1) ? 0 : (c * M) + m]); + acc = apply_add_s(acc, bias[(BC == 1) ? 0 : (c * M) + m]); tensor_write(output, [N,OH,OW,C * M], [n,oy,ox,c * M + m], acc); } ---- @@ -262,20 +279,20 @@ include::{generated}/operators/FULLY_CONNECTED.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(BC != OC && BC != 1); for_each(0 <= n < N, 0 <= oc < OC) { out_t acc = 0; for_each(0 <= ic < IC) { - out_t value = tensor_read(input, [N,IC], [n,ic]); - out_t weight = tensor_read(weight, [OC,IC], [oc,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add(acc, value * weight); + out_t value = static_cast(tensor_read(input, [N,IC], [n,ic])); + out_t weight = static_cast(tensor_read(weight, [OC,IC], [oc,ic])); + value = apply_sub_s(value, static_cast(input_zp)); + weight = apply_sub_s(weight, static_cast(weight_zp)); + acc = apply_add_s(acc, apply_mul_s(value, weight)); } - acc = apply_add(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s(acc, bias[(BC == 1) ? 0 : oc]); tensor_write(output, [N,OC], [n,oc], acc); } ---- @@ -288,15 +305,15 @@ include::{generated}/operators/MATMUL.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t +ERROR_IF(in_t != i8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) { out_t acc = 0; for_each(0 <= c < C) { - out_t value1 = tensor_read(A, [N,H,C], [n,h,c]); - out_t value2 = tensor_read(B, [N,C,W], [n,c,w]); - value1 = value1 - A_zp; - value2 = value2 - B_zp; - acc = apply_add(acc, value1 * value2); + out_t value1 = static_cast(tensor_read(A, [N,H,C], [n,h,c])); + out_t value2 = static_cast(tensor_read(B, [N,C,W], [n,c,w])); + value1 = apply_sub_s(value1, static_cast(A_zp)); + value2 = apply_sub_s(value2, static_cast(B_zp)); + acc = apply_add_s(acc, apply_mul_s(value1 * value2)); } tensor_write(output, [N,H,W], [n,h,w], acc); } @@ -329,7 +346,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { index_t x = ix + kx; if (y >= 0 && y < IH && x >= 0 && x < IW) { in_out_t value = tensor_read(input, [N,IH,IW,C], [n,y,x,c]); - acc = apply_max(acc, value); + acc = apply_max_s(acc, value); } } tensor_write(output, [N,OH,OW,C], [n,oy,ox,c], acc); @@ -376,8 +393,8 @@ include::{generated}/operators/TRANSPOSE_CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only allowed for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only allowed for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(out_pad_top <= -KH || out_pad_bottom <= -KH); ERROR_IF(out_pad_left <= -KW || out_pad_right <= -KW); ERROR_IF(stride_y < 1 || stride_x < 1); @@ -393,12 +410,12 @@ for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, index_t oy = iy * stride_y + out_pad_top + ky; index_t ox = ix * stride_x + out_pad_left + kx; if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) { - out_t acc = tensor_read(output, [N,OH,OW,OC], [n,oy,ox,oc]); - out_t value = tensor_read(input, [N,IH,IW,IC], [n,iy,ix,ic]); - out_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add(acc, value * weight); + out_t acc = static_cast(tensor_read(output, [N,OH,OW,OC], [n,oy,ox,oc])); + out_t value = static_cast(tensor_read(input, [N,IH,IW,IC], [n,iy,ix,ic])); + out_t weight = static_cast(tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic])); + value = apply_sub_s(value, static_cast(input_zp)); + weight = apply_sub_s(weight, static_cast(weight_zp)); + acc = apply_add_s(acc, apply_mul_s(value, weight)); tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } } diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc index 415c5d9..90aca2d 100644 --- a/chapters/type_conversion.adoc +++ b/chapters/type_conversion.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2022 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -29,7 +29,7 @@ for_each(index in shape) { } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) { out = apply_clip(round_to_nearest_int(in), minimum, maximum); } else if (sizeof(out_t) >= sizeof(in_t)) { - out = sign_extend(in); + out = sign_extend(in); } else { out = truncate(in); } @@ -49,24 +49,49 @@ for_each(index in shape) { // uint16 values can have zero_point 0 or 32768 // int8/uint8 can have zero point within their valid range // No other types can have zero point != 0 - ERROR_IF(in_t != int8_t && - in_t != uint8_t && - in_t != uint16_t && input_zp != 0); - ERROR_IF(out_t != int8_t && - out_t != uint8_t && - out_t != uint16_t && output_zp != 0); - ERROR_IF(in_t == uint16_t && input_zp != 0 && input_zp != 32768); - ERROR_IF(out_t == uint16_t && output_zp != 0 && output_zp != 32768); - ERROR_IF(scale32 && in_t == int48_t); + ERROR_IF(in_t != i8_t && + (in_t != i16_t || input_unsigned == False) && input_zp != 0); + ERROR_IF(out_t != i8_t && + (out_t != i16_t || output_unsigned == False) && output_zp != 0); + ERROR_IF(in_t == i16_t && input_unsigned == True && input_zp != 0 && input_zp != 32768); + ERROR_IF(out_t == i16_t && output_unsigned == True && output_zp != 0 && output_zp != 32768); + ERROR_IF(scale32 && in_t == i48_t); ERROR_IF(!scale32 && double_round); - int48_t value = tensor_read(input, shape, index); - value = value - input_zp; + ERROR_IF(in_t == i16_t && out_t == i32_t && input_unsigned); + ERROR_IF(in_t == i32_t && out_t == i16_t && output_unsigned); + + in_t in_value = tensor_read(input, shape, index); + + int48_t value, extended_in_zp; + if (input_unsigned) { + value = zero_extend(in_value); + extended_in_zp = zero_extend(input_zp); + } + else { + value = sign_extend(value); + extended_in_zp = sign_extend(input_zp); + } + + value = value - extended_in_zp; int c = (per_channel) ? index[rank(input) - 1] : 0; int32_t result = (scale32) ? apply_scale_32(value, multiplier[c], shift[c], double_round) : apply_scale_16(value, multiplier[c], shift[c]); - result = apply_add(result, output_zp); - out_t out = (out_t)apply_clip(result, minimum, maximum); + + if (output_unsigned) { + int32_t extended_out_zp = zero_extend(output_zp); + result = apply_add_s(result, extended_out_zp); + out_t out = static_cast(apply_clip(result, + minimum_u, + maximum_u)); + } + else { + int32_t extended_out_zp = sign_extend(output_zp); + result = apply_add_s(result, extended_out_zp); + out_t out = static_cast(apply_clip(result, + minimum_s, + maximum_s)); + } tensor_write(output, shape, index, out); } ---- diff --git a/tools/dictionary.dic b/tools/dictionary.dic index e2e1a58..faf431a 100644 --- a/tools/dictionary.dic +++ b/tools/dictionary.dic @@ -67,6 +67,7 @@ RSQRT sigmoid Sigmoid SIGMOID +signless SIMD subtensor svg @@ -79,6 +80,7 @@ tosa TOSA TOSASETS TPUs +uint ulp unary Unary diff --git a/tosa.xml b/tosa.xml index 53f8000..b5a5f7a 100644 --- a/tosa.xml +++ b/tosa.xml @@ -21,7 +21,7 @@ - + Axis in range from 0 to rank(shape1) - 1 @@ -34,17 +34,17 @@ - - - + + + - + - + @@ -56,19 +56,19 @@ Input tensor - + [kernel_y, kernel_x] - + [stride_y, stride_x] - + [pad_top, pad_bottom, pad_left, pad_right] @@ -97,8 +97,8 @@ - - + + @@ -133,7 +133,7 @@ Per output channel bias data. - + [pad_top, pad_bottom, pad_left, pad_right] @@ -141,13 +141,13 @@ - + [stride_y, stride_x] - + [dilation_y, dilation_x] @@ -177,9 +177,9 @@ - - - + + + @@ -215,7 +215,7 @@ Per output channel bias data. - + [pad_d0, pad_d1, pad_top, pad_bottom, pad_left, pad_right] @@ -225,14 +225,14 @@ - + [stride_d, stride_y, stride_x] - + [dilation_d, dilation_y, dilation_x] @@ -262,9 +262,9 @@ - - - + + + @@ -299,7 +299,7 @@ Per output channel bias data. - + [pad_top, pad_bottom, pad_left, pad_right] @@ -307,13 +307,13 @@ - + [stride_y, stride_x] - + [dilation_y, dilation_x] @@ -343,9 +343,9 @@ - - - + + + @@ -438,9 +438,9 @@ - - - + + + @@ -486,8 +486,8 @@ - - + + @@ -512,19 +512,19 @@ Input tensor 4D - + [kernel_y, kernel_x] - + [stride_y, stride_x] - + [pad_top, pad_bottom, pad_left, pad_right] @@ -540,8 +540,8 @@ - - + + @@ -598,7 +598,7 @@ Per output channel bias data. - + [out_pad_top, out_pad_bottom, out_pad_left, out_pad_right] @@ -606,13 +606,13 @@ - + [stride_y, stride_x] - + [N,OH,OW,OC] @@ -642,9 +642,9 @@ - - - + + + @@ -688,8 +688,8 @@ - - + + @@ -812,7 +812,7 @@ - + @@ -851,9 +851,9 @@ - - - + + + BITWISE_AND @@ -875,9 +875,9 @@ - - - + + + BITWISE_OR @@ -899,9 +899,9 @@ - - - + + + BITWISE_XOR @@ -923,9 +923,9 @@ - - - + + + INTDIV @@ -947,7 +947,7 @@ - + @@ -992,9 +992,9 @@ - - - + + + LOGICAL_RIGHT_SHIFT @@ -1016,9 +1016,9 @@ - - - + + + LOGICAL_OR @@ -1084,7 +1084,7 @@ - + @@ -1118,7 +1118,7 @@ - + @@ -1143,8 +1143,8 @@ Input tensor with the same rank as input1 - - Result right shift (int32_t data type only) + + Result right shift (i32_t data type only) @@ -1157,9 +1157,9 @@ - - - + + + @@ -1227,7 +1227,7 @@ - + @@ -1265,8 +1265,8 @@ - - + + @@ -1286,7 +1286,7 @@ - + @@ -1316,9 +1316,9 @@ - - - + + + CEIL @@ -1365,7 +1365,7 @@ - + EXP @@ -1497,9 +1497,9 @@ - - - + + + @@ -1598,9 +1598,9 @@ - - - + + + @@ -1637,7 +1637,7 @@ - + @@ -1672,7 +1672,7 @@ - + @@ -1707,7 +1707,7 @@ - + @@ -1730,7 +1730,7 @@ Input tensor - + Axis to reduce, in range from 0 to rank(shape1)-1 @@ -1751,7 +1751,7 @@ Input tensor - + Axis to reduce, in range from 0 to rank(shape1)-1 @@ -1772,7 +1772,7 @@ Input tensor - + Axis to reduce, in range from 0 to rank(shape1)-1 @@ -1784,9 +1784,9 @@ - - - + + + @@ -1807,7 +1807,7 @@ Input tensor - + Axis to reduce, in range from 0 to rank(shape1)-1 @@ -1819,9 +1819,9 @@ - - - + + + @@ -1842,7 +1842,7 @@ Input tensor - + Axis to reduce, in range from 0 to rank(shape1)-1 @@ -1874,7 +1874,7 @@ Input tensor with rank from 1 to 4 - + Axis to reduce, in range from 0 to rank(shape1)-1 @@ -1886,7 +1886,7 @@ - + @@ -1909,7 +1909,7 @@ List of input tensors. All inputs must have the same rank and data type - + Axis along which concatenation is to occur, in range from 0 to rank(shape)-1 @@ -1923,9 +1923,9 @@ - - - + + + @@ -1965,9 +1965,9 @@ - - - + + + @@ -1989,7 +1989,7 @@ - + Axis in range from 0 to rank(shape) - 1 @@ -2002,9 +2002,9 @@ - - - + + + @@ -2040,9 +2040,9 @@ - - - + + + @@ -2064,7 +2064,7 @@ - + Axis to reverse, in range from 0 to rank(shape)-1 @@ -2077,9 +2077,9 @@ - - - + + + @@ -2120,9 +2120,9 @@ used. - - - + + + @@ -2157,9 +2157,9 @@ used. - - - + + + @@ -2180,7 +2180,7 @@ used. Input tensor - + List of integers of length equal to the rank of input1. Values must be valid dimensions within shape1, and may not be repeated. @@ -2194,9 +2194,9 @@ used. - - - + + + @@ -2231,9 +2231,9 @@ used. - - - + + + @@ -2270,9 +2270,9 @@ used. - - - + + + @@ -2323,10 +2323,10 @@ used. - - - - + + + + @@ -2359,63 +2359,63 @@ used. - - - - - - - + + + + + + + - + - + - - - - + + + + - + - + - - - - + + + + - + - + - + - + - + @@ -2423,15 +2423,15 @@ used. - + - + - + @@ -2439,15 +2439,15 @@ used. - + - + - + @@ -2484,12 +2484,12 @@ used. Scaling multiplier array - + Scaling shift array - if (scale32) mul_t=int32_t else mul_t=int16_t + if (scale32) mul_t=i32_t else mul_t=i16_t @@ -2500,29 +2500,31 @@ used. if (per_channel) NC=shape[rank(shape)-1] else NC=1 + + If True, treat the input values as unsigned. + + + + If True, treat the output values as unsigned. + + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + @@ -2542,11 +2544,11 @@ used. - - - - - + + + + + @@ -2577,9 +2579,9 @@ used. - - - + + + @@ -2638,7 +2640,7 @@ used. VARIABLE - + Globally unique identifier for the declared variable tensor. @@ -2660,7 +2662,7 @@ used. VARIABLE_WRITE - + Globally unique identifier of the variable tensor that is writing to @@ -2674,7 +2676,7 @@ used. VARIABLE_READ - + Globally unique identifier of the variable tensor that is reading from diff --git a/tosa.xsd b/tosa.xsd index 440dbbd..8a3c307 100644 --- a/tosa.xsd +++ b/tosa.xsd @@ -33,13 +33,11 @@ - - - - - - - + + + + + -- cgit v1.2.1