Change TOSA specification to signless types

Integer inputs and outputs to TOSA operators are now defined as signless values. In most instances the operator will used signed arithmetic as indicated in previous versions of the specification resulting in little functional change to the specification. New attributes have been added to the RESCALE operator to indicate whether the input and output values should be treated as signed or unsigned. Explicit use of static_cast, sign_extend, zero_extend and truncate are added to the pseudocode to avoid ambiguity. Change-Id: I71c67d3e5aeaabc418c768f821fce6ee3eebb65b
author: Eric Kunze <eric.kunze@arm.com> 2023-07-18 15:20:53 -0700
committer: Eric Kunze <eric.kunze@arm.com> 2023-08-17 09:32:28 -0700
commit: fb0284e2912bd5fd73bf6f476901490e04c330a2 (patch)
tree: 1784e40ad84a91e751679a4cbdf6cd33be1eefdb /chapters/introduction.adoc
parent: b5b067819e5de11153b41cf3d26da4f3f9dd23e8 (diff)
download: specification-fb0284e2912bd5fd73bf6f476901490e04c330a2.tar.gz
1 files changed, 40 insertions, 13 deletions
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index d6f7bf9..26fef0e 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -338,11 +338,11 @@ fp64_t acc_prec;       // 1<<(M+1) where M is the number of mantissa bits
 fp64_t acc_min_normal; // accumulator minimum normal greater than zero
 fp64_t two_m63 = -1.0/(fp64)((int64_t)-1<<63);         // pow(2,  -63)
 switch (acc_t) {
-    case fp32_t: acc_prec       = (fp64_t)(1<<24);     // pow(2,   24)
+    case fp32_t: acc_prec       = static_cast<fp64_t>(1<<24);     // pow(2,   24)
                  acc_min_normal = two_m63 * two_m63;   // pow(2, -126)
                  break;
-    case fp16_t: acc_prec       = (fp64_t)(1<<11);     // pow(2,   11)
-                 acc_min_normal = 1.0/(fp64_t)(1<<14); // pow(2,  -14)
+    case fp16_t: acc_prec       = static_cast<fp64_t>(1<<11);     // pow(2,   11)
+                 acc_min_normal = 1.0/static_cast<fp64_t>(1<<14); // pow(2,  -14)
                  break;
     default:     ERROR_IF(true);
 }
@@ -359,7 +359,7 @@ for_each(index in output_shape) {
         out_err = 0.0;
     } else {  // 0.0 < out_bnd < infinity
         out_bnd = max(out_bnd, acc_min_normal);
-        out_err = ((fp64_t)out_imp - out_ref)*acc_prec/out_bnd;
+        out_err = (static_cast<fp64_t>(out_imp) - out_ref) * acc_prec / out_bnd;
         REQUIRE(abs(out_err) <= ksb);
     }
     out_err_sum   += out_err;
@@ -457,11 +457,21 @@ The number formats supported by a given operator are listed in its table of supp
 | -
 |Boolean value. Size implementation defined. The TOSA reference model implements this as int8_t with 0 for false and 1 for true. All non-zero values are accepted on input as true.
 
+|i4_t
+| -
+| -
+|Signless 4-bit integer type. Will be interpreted as int4_t by all operators
+
 |int4_t
 | -7
 | +7
 |Signed 4-bit two's-complement value. Excludes -8 to maintain a symmetric about zero range for weights.
 
+|i8_t
+| -
+| -
+|Signless 8-bit integer value. Will be interpreted as int8_t unless otherwise specified by an operator.
+
 |int8_t
 | -128
 | +127
@@ -470,7 +480,12 @@ The number formats supported by a given operator are listed in its table of supp
 |uint8_t
 | 0
 | 255
-|Unsigned 8-bit value.
+|Unsigned 8-bit integer value.
+
+|i16_t
+| -
+| -
+|Signless 16-bit integer type. Will be interpreted as int16_t unless otherwise specified by an operator.
 
 |int16_t
 | -32768
@@ -482,11 +497,21 @@ The number formats supported by a given operator are listed in its table of supp
 | 65535
 |Unsigned 16-bit value.
 
+|i32_t
+| -
+| -
+|Signless 32-bit integer value. Will be interpreted as int32_t by all operators.
+
 |int32_t
 | -(1<<31)
 | (1<<31)-1
 |Signed 32-bit two's-complement value.
 
+|i48_t
+| -
+| -
+|Signless 32-bit integer value. Will be interpreted as int48_t by all operators.
+
 |int48_t
 | -(1<<47)
 | (1<<47)-1
@@ -542,7 +567,9 @@ This ensures that a Base Inference profile TOSA implementation can calculate the
 
 === Integer Behavior
 
-Integer calculations must be standard two's-complement or unsigned calculations.
+TOSA integer inputs and outputs are specified by signless values with the given number of bits.
+Unless otherwise specified, these values will be interpreted as signed twos-complement.
+The pseudocode will use int*_t to indicate use as a signed value and uint*_t to indicate use as an unsigned value.
 If overflow occurs doing integer calculation, the result is unpredictable, as indicated by the REQUIRE checks in the pseudocode for the operators.
 
 Unsigned 8 and 16-bit values are only allowed in the RESCALE operation, to allow for compatibility with networks which expect unsigned 8-bit or 16-bit tensors for input and output.
@@ -598,20 +625,20 @@ int32_t apply_scale_32(int32_t value, int32_t multiplier, int8_t shift, bool_t d
         if (shift > 31 && value >= 0) round += 1<<30;
         if (shift > 31 && value < 0)  round -= 1<<30;
     }
-    int64_t result = (int64_t)value * multiplier + round;
+    int64_t result = static_cast<int64_t>(value) * multiplier + round;
     result = result >> shift;
     // result will fit a 32-bit range due to the REQUIRE on value
-    return (int32_t)result;
+    return static_cast<int32_t>(result);
 }
 
 int32_t apply_scale_16(int48_t value, int16_t multipler, int8_t shift) {
     REQUIRE(multiplier >= 0);
     REQUIRE(2 <= shift && shift <= 62);
     int64_t round = (1 << (shift - 1));
-    int64_t result = (int64_t)value * multiplier + round;
+    int64_t result = static_cast<int64_t>(value) * multiplier + round;
     result = result >> shift;
     REQUIRE(result >= minimum<int32_t> && result <= maximum<int32_t>);
-    return (int32_t)result;
+    return static_cast<int32_t>(result);
 }
 ----
 
@@ -665,9 +692,9 @@ All table lookups are based on the following reference lookup function that take
 
 [source,c++]
 ----
-int32_t apply_lookup(int16_t *table, int32_t value)
+int32_t apply_lookup_s(int16_t *table, int32_t value)
 {
-    int16_t clipped_value = (int16_t)apply_clip<int32_t>(value, -32768, +32767);
+    int16_t clipped_value = static_cast<int16_t>(apply_clip_s<int32_t>(value, -32768, +32767));
     int32_t index = (clipped_value + 32768) >> 7;
     int32_t fraction = clipped_value & 0x7f;
     int16_t base = table[index];
@@ -688,7 +715,7 @@ void generate_lookup_table(int16_t *table, int32_t (*reference)(int32_t))
 {
     for (int i = -256; i <= 256; i++) {
         int32_t value = (*reference)(i);
-        table[i + 256] = (int16_t)apply_clip<int32_t>(value, -32768, +32767)
+        table[i + 256] = static_cast<int16_t>(apply_clip<int32_t>(value, -32768, +32767));
     }
 }
 ----
author	Eric Kunze <eric.kunze@arm.com>	2023-07-18 15:20:53 -0700
committer	Eric Kunze <eric.kunze@arm.com>	2023-08-17 09:32:28 -0700
commit	fb0284e2912bd5fd73bf6f476901490e04c330a2 (patch)
tree	1784e40ad84a91e751679a4cbdf6cd33be1eefdb /chapters/introduction.adoc
parent	b5b067819e5de11153b41cf3d26da4f3f9dd23e8 (diff)
download	specification-fb0284e2912bd5fd73bf6f476901490e04c330a2.tar.gz