Change TOSA specification to signless types

Integer inputs and outputs to TOSA operators are now defined as signless values. In most instances the operator will used signed arithmetic as indicated in previous versions of the specification resulting in little functional change to the specification. New attributes have been added to the RESCALE operator to indicate whether the input and output values should be treated as signed or unsigned. Explicit use of static_cast, sign_extend, zero_extend and truncate are added to the pseudocode to avoid ambiguity. Change-Id: I71c67d3e5aeaabc418c768f821fce6ee3eebb65b
author: Eric Kunze <eric.kunze@arm.com> 2023-07-18 15:20:53 -0700
committer: Eric Kunze <eric.kunze@arm.com> 2023-08-17 09:32:28 -0700
commit: fb0284e2912bd5fd73bf6f476901490e04c330a2 (patch)
tree: 1784e40ad84a91e751679a4cbdf6cd33be1eefdb /chapters/tensor_ops.adoc
parent: b5b067819e5de11153b41cf3d26da4f3f9dd23e8 (diff)
download: specification-fb0284e2912bd5fd73bf6f476901490e04c330a2.tar.gz
1 files changed, 73 insertions, 56 deletions
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index 6387790..b9d54c1 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020-2022 ARM Limited
+// (C) COPYRIGHT 2020-2023 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -31,12 +31,15 @@ if (axis == rank(shape1)-1) {
 ERROR_IF(flatten(left_shape, right_shape) != shape);
 for_each(left_index in left_shape) {
     for_each(right_index in right_shape) {
-        in_t max_value = minimum_value<in_t>;
+        in_t max_value = minimum_s<in_t>;
         out_t max_index = 0;
         for (i = 0; i < shape[axis]; i++) {
             dim_t index = flatten(left_index, [i], right_index);
             in_t value = tensor_read<in_t>(input, shape1, index);
-            if (value > max_value) { max_value = value; max_index = i; }
+            if (apply_max_s<in_t>(value, max_value) != max_value) {
+                max_value = value;
+                max_index = i;
+            }
         }
         dim_t index = flatten(left_index, right_index);
         tensor_write<out_t>(output, shape, index, max_index);
@@ -54,8 +57,8 @@ include::{generated}/operators/AVG_POOL2D.adoc[]
 
 [source,c++]
 ----
-ERROR_IF(in_out_t != int8_t && input_zp != 0); // Zero point only for int8_t
-ERROR_IF(in_out_t != int8_t && output_zp != 0); // Zero point only for int8_t
+ERROR_IF(in_out_t != i8_t && input_zp != 0); // Zero point only for int8_t
+ERROR_IF(in_out_t != i8_t && output_zp != 0); // Zero point only for int8_t
 ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1
 ERROR_IF(stride_y < 1 || stride_x < 1);
 ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0);
@@ -79,17 +82,19 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= c < C ) {
         // average, padding does not count
         if (0 <= y < IH and 0 <= x < IW) {
             count++;
-            acc_t value = tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]);
-            value = value - input_zp;
-            acc = apply_add<acc_t>(acc, value);
+            acc_t value = sign_extend<acc_t>(tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]));
+            value = apply_sub_s<acc_t>(value, sign_extend<acc_t>(input_zp));
+            acc = apply_add_s<acc_t>(acc, value);
         }
     }
     if (is_float(in_out_t)) {
-        output_val = acc / (float)count;
+        output_val = acc / static_cast<in_out_t>(count);
     } else {
         scale_t scale = reciprocal_scale(count);
         acc = apply_scale_32(acc, scale.multiplier, scale.shift, false);
-        output_val = (in_out_t)apply_clip<acc_t>(acc + output_zp, minimum<in_out_t>, maximum<in_out_t>)
+        acc = apply_add_s<acc_t>(acc, sign_extend<acc_t>(output_zp));
+        acc = apply_clip_s<acc_t>(acc, minimum_s<in_out_t>, maximum_s<in_out_t>);
+        output_val = static_cast<in_out_t>(acc);
     }
     tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], output_val);
 }
@@ -103,7 +108,7 @@ include::{generated}/operators/CONV2D.adoc[]
 
 [source,c++]
 ----
-ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
+ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t
 ERROR_IF(weight_t != int8_t && weight_zp != 0);
 ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0);
 ERROR_IF(stride_y < 1 || stride_x < 1);
@@ -120,14 +125,18 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) {
         index_t y = iy + ky * dilation_y;
         index_t x = ix + kx * dilation_x;
         if (0 <= y < IH && 0 <= x < IW) {
-            out_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic]);
-            out_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]);
-            value  = value - input_zp;
-            weight = weight - weight_zp;
-            acc = apply_add<out_t>(acc, value * weight);
+            out_t value  = static_cast<out_t>(tensor_read<in_t>(input,
+                                                                [N,IH,IW,IC],
+                                                                [n,y,x,ic]));
+            out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight,
+                                                                   [OC,KH,KW,IC],
+                                                                   [oc,ky,kx,ic]));
+            value  = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp));
+            weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp));
+            acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight));
         }
     }
-    acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]);
+    acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]);
     tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc);
 }
 ----
@@ -140,8 +149,8 @@ include::{generated}/operators/CONV3D.adoc[]
 
 [source,c++]
 ----
-ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
-ERROR_IF(weight_t != int8_t && weight_zp != 0);
+ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t
+ERROR_IF(weight_t != i8_t && weight_zp != 0);
 ERROR_IF(pad_d0 < 0 || pad_d1 < 0 || pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0);
 ERROR_IF(stride_d < 1 || stride_y < 1 || stride_x < 1);
 ERROR_IF(dilation_d < 1 || dilation_y < 1 || dilation_x < 1);
@@ -160,14 +169,18 @@ for_each(0 <= n < N, 0 <= od < OD, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) {
         index_t y = iy + ky * dilation_y;
         index_t x = ix + kx * dilation_x;
         if (0 <= x < IW && 0 <= y < IH && 0 <= d < ID) {
-            out_t value  = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic]);
-            out_t weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic]);
-            value  = value - input_zp;
-            weight = weight - weight_zp;
-            acc = apply_add<out_t>(acc, value * weight);
+            out_t value  = static_cast<out_t>(tensor_read<in_t>(input,
+                                                                [N,ID,IH,IW,IC],
+                                                                [n,d,y,x,ic]));
+            out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight,
+                                                                    [OC,KD,KH,KW,IC],
+                                                                    [oc,kd,ky,kx,ic]));
+            value  = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp));
+            weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp));
+            acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight));
         }
     }
-    acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]);
+    acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]);
     tensor_write<out_t>(output, [N,OD,OH,OW,OC], [n,od,oy,ox,oc], acc);
 }
 ----
@@ -180,8 +193,8 @@ include::{generated}/operators/DEPTHWISE_CONV2D.adoc[]
 
 [source,c++]
 ----
-ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
-ERROR_IF(weight_t != int8_t && weight_zp != 0);
+ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t
+ERROR_IF(weight_t != i8_t && weight_zp != 0);
 ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0);
 ERROR_IF(stride_y < 1 || stride_x < 1);
 ERROR_IF(dilation_y < 1 || dilation_x < 1);
@@ -197,14 +210,18 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C, 0 <= m < M) {
         index_t y = iy + ky * dilation_y;
         index_t x = ix + kx * dilation_x;
         if (0 <= y < IH && 0 <= x < IW) {
-            out_t value  = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c]);
-            out_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m]);
-            value  = value - input_zp;
-            weight = weight - weight_zp;
-            acc = apply_add<out_t>(acc, value * weight);
+            out_t value  = static_cast<out_t>(tensor_read<in_t>(input,
+                                                                [N,IH,IW,C],
+                                                                [n,y,x,c]));
+            out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight,
+                                                                    [KH,KW,C,M],
+                                                                    [ky,kx,c,m]));
+            value  = apply_sub_s<out_t>(value, static_cast<out_t>input_zp);
+            weight = apply_sub_s<out_t>(weight, static_cast<out_t>weight_zp);
+            acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight));
         }
     }
-    acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : (c * M) + m]);
+    acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : (c * M) + m]);
     tensor_write<out_t>(output, [N,OH,OW,C * M], [n,oy,ox,c * M + m], acc);
 }
 ----
@@ -262,20 +279,20 @@ include::{generated}/operators/FULLY_CONNECTED.adoc[]
 
 [source,c++]
 ----
-ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
-ERROR_IF(weight_t != int8_t && weight_zp != 0);
+ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t
+ERROR_IF(weight_t != i8_t && weight_zp != 0);
 ERROR_IF(BC != OC && BC != 1);
 
 for_each(0 <= n < N, 0 <= oc < OC) {
     out_t acc = 0;
     for_each(0 <= ic < IC) {
-        out_t value  = tensor_read<in_t>(input, [N,IC], [n,ic]);
-        out_t weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic]);
-        value  = value - input_zp;
-        weight = weight - weight_zp;
-        acc = apply_add<out_t>(acc, value * weight);
+        out_t value  = static_cast<out_t>(tensor_read<in_t>(input, [N,IC], [n,ic]));
+        out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, [OC,IC], [oc,ic]));
+        value  = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp));
+        weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp));
+        acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight));
     }
-    acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]);
+    acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]);
     tensor_write<out_t>(output, [N,OC], [n,oc], acc);
 }
 ----
@@ -288,15 +305,15 @@ include::{generated}/operators/MATMUL.adoc[]
 
 [source,c++]
 ----
-ERROR_IF(in_t != int8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t
+ERROR_IF(in_t != i8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t
 for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) {
     out_t acc = 0;
     for_each(0 <= c < C) {
-        out_t value1 = tensor_read<in_t>(A, [N,H,C], [n,h,c]);
-        out_t value2 = tensor_read<in_t>(B, [N,C,W], [n,c,w]);
-        value1 = value1 - A_zp;
-        value2 = value2 - B_zp;
-        acc = apply_add<out_t>(acc, value1 * value2);
+        out_t value1 = static_cast<out_t>(tensor_read<in_t>(A, [N,H,C], [n,h,c]));
+        out_t value2 = static_cast<out_t>(tensor_read<in_t>(B, [N,C,W], [n,c,w]));
+        value1 = apply_sub_s<out_t>(value1, static_cast<out_t>(A_zp));
+        value2 = apply_sub_s<out_t>(value2, static_cast<out_t>(B_zp));
+        acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value1 * value2));
     }
     tensor_write<out_t>(output, [N,H,W], [n,h,w], acc);
 }
@@ -329,7 +346,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
         index_t x = ix + kx;
         if (y >= 0 && y < IH && x >= 0 && x < IW) {
             in_out_t value = tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]);
-            acc = apply_max(acc, value);
+            acc = apply_max_s<in_out_t>(acc, value);
         }
     }
     tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
@@ -376,8 +393,8 @@ include::{generated}/operators/TRANSPOSE_CONV2D.adoc[]
 
 [source,c++]
 ----
-ERROR_IF(in_t != int8_t  && input_zp != 0); // Zero point only allowed for int8_t
-ERROR_IF(weight_t != int8_t && weight_zp != 0);
+ERROR_IF(in_t != i8_t  && input_zp != 0); // Zero point only allowed for int8_t
+ERROR_IF(weight_t != i8_t && weight_zp != 0);
 ERROR_IF(out_pad_top <= -KH || out_pad_bottom <= -KH);
 ERROR_IF(out_pad_left <= -KW || out_pad_right <= -KW);
 ERROR_IF(stride_y < 1 || stride_x < 1);
@@ -393,12 +410,12 @@ for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC,
     index_t oy = iy * stride_y + out_pad_top + ky;
     index_t ox = ix * stride_x + out_pad_left + kx;
     if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) {
-        out_t acc = tensor_read<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc]);
-        out_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic]);
-        out_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]);
-        value = value - input_zp;
-        weight = weight - weight_zp;
-        acc = apply_add<out_t>(acc, value * weight);
+        out_t acc = static_cast<out_t>(tensor_read<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc]));
+        out_t value = static_cast<out_t>(tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic]));
+        out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]));
+        value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp));
+        weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp));
+        acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight));
         tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc);
     }
 }
author	Eric Kunze <eric.kunze@arm.com>	2023-07-18 15:20:53 -0700
committer	Eric Kunze <eric.kunze@arm.com>	2023-08-17 09:32:28 -0700
commit	fb0284e2912bd5fd73bf6f476901490e04c330a2 (patch)
tree	1784e40ad84a91e751679a4cbdf6cd33be1eefdb /chapters/tensor_ops.adoc
parent	b5b067819e5de11153b41cf3d26da4f3f9dd23e8 (diff)
download	specification-fb0284e2912bd5fd73bf6f476901490e04c330a2.tar.gz