From 8e4a9d33f0527107fda724fc0f7b6b7c1f42bf79 Mon Sep 17 00:00:00 2001
From: Eric Kunze <eric.kunze@arm.com>
Date: Mon, 8 Mar 2021 16:17:26 -0800
Subject: Adjust pseudocode types to account for zero point

When reading tensor values with zero point, the returned value
has one more bit than the original to account for zero point.

Update cases of apply_clip to properly represent the types
involved.

Change-Id: I60c17b1b244c34b4f04f042807936ae0f282ce93
---
 chapters/activation_funcs.adoc | 16 ++++++++--------
 chapters/data_layout.adoc      | 14 +++++++-------
 chapters/ewise_unary.adoc      | 18 +++++++++---------
 chapters/introduction.adoc     | 38 ++++++++++++++++++++++----------------
 chapters/tensor_ops.adoc       | 28 ++++++++++++++--------------
 chapters/type_conversion.adoc  |  4 ++--
 6 files changed, 62 insertions(+), 56 deletions(-)

diff --git a/chapters/activation_funcs.adoc b/chapters/activation_funcs.adoc
index 5af849d..7a4a7b6 100644
--- a/chapters/activation_funcs.adoc
+++ b/chapters/activation_funcs.adoc
@@ -27,8 +27,8 @@ Clamp to an arbitrary minimum and maximum value. Note that the maximum and minim
 *Operation Function:*
 ....
 for_each(index in shape) {
-    value = tensor_read<in_t>(input, shape, index);
-    acc = apply_clip<in_t>(value, min_val, max_val);
+    acc_t value = tensor_read<in_t>(input, shape, index);
+    acc = (in_t)apply_clip<acc_t>(value, min_val, max_val);
     tensor_write<in_t>(output, shape, index, acc);
 }
 ....
@@ -36,11 +36,11 @@ for_each(index in shape) {
 *Supported Data Types:*
 
 |===
-|Profile|Mode|in_t
+|Profile|Mode|in_t|acc_t
 
-|Any|signed 8|int8_t
-|Any|signed 16|int16_t
-|MI, MT|floating-point|float_t
+|Any|signed 8|int8_t|int16_t
+|Any|signed 16|int16_t|int16_t
+|MI, MT|floating-point|float_t|float_t
 |===
 
 ==== RELUN
@@ -63,8 +63,8 @@ ReLU with a scalar maximum value.
 ----
 for_each(index in shape) {
     in_t value = tensor_read<in_t>(input, shape, index);
-    acc = apply_clip<in_t>(value, 0, max_val);
-    tensor_write<in_t>(output, shape, index, acc);
+    value = apply_clip<in_t>(value, 0, max_val);
+    tensor_write<in_t>(output, shape, index, value);
 }
 ----
 
diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc
index 67484cb..b5b5112 100644
--- a/chapters/data_layout.adoc
+++ b/chapters/data_layout.adoc
@@ -86,7 +86,7 @@ for_each(index in shape) {
     for(i = 0; i < rank(shape); i++) {
         index1[i] = index1[i] - padding[i,0];
     }
-    in_t value = tensor_read<in_t>(input1, shape1, index1, input1_zp, padding);
+    acc_t value = tensor_read<in_t>(input1, shape1, index1, input1_zp, padding);
     tensor_write<in_t>(output, shape, index, value + input1_zp);
 }
 ----
@@ -94,13 +94,13 @@ for_each(index in shape) {
 *Supported Data Types:*
 
 |===
-|Profile|Mode|in_t
+|Profile|Mode|in_t|acc_t
 
-|Any|Boolean|bool_t
-|Any|signed 8|int8_t
-|Any|signed 16|int16_t
-|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|Any|Boolean|bool_t|bool_t
+|Any|signed 8|int8_t|int16_t
+|Any|signed 16|int16_t|int16_t
+|Any|signed 32|int32_t|int32_t
+|MI, MT|floating-point|float_t|float_t
 |===
 
 ==== RESHAPE
diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc
index d852fa4..3784274 100644
--- a/chapters/ewise_unary.adoc
+++ b/chapters/ewise_unary.adoc
@@ -262,22 +262,22 @@ Elementwise negation operation
 assert(in_t == int8_t || input1_zp == 0) // Zero point only for int8_t
 assert(in_t == int8_t || output_zp == 0) // Zero point only for int8_t
 for_each(index in shape) {
-    in_t value1 = tensor_read<in_t>(input1, shape, index, input1_zp);
-    in_t acc = apply_sub<in_t>(0, value1);
-    acc = apply_clip<in_t>(acc + output_zp, minimum<in_t>, maximum<in_t>);
-    tensor_write<in_t>(output, shape, index, acc);
+    acc_t acc = tensor_read<in_t>(input1, shape, index, input1_zp);
+    acc = apply_sub<acc_t>(0, acc);
+    in_t value = (in_t)apply_clip<acc_t>(acc + output_zp, minimum<in_t>, maximum<in_t>);
+    tensor_write<in_t>(output, shape, index, value);
 }
 ----
 
 *Supported Data Types:*
 
 |===
-|Profile|Mode|in_t
+|Profile|Mode|in_t|acc_t
 
-|Any|signed 8|int8_t
-|Any|signed 16|int16_t
-|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|Any|signed 8|int8_t|int32_t
+|Any|signed 16|int16_t|int32_t
+|Any|signed 32|int32_t|int32_t
+|MI, MT|floating-point|float_t|float_t
 |===
 
 ==== RECIPROCAL
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index 3257ab0..7039e27 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -197,14 +197,20 @@ The padding array represents the before and after pair for each dimension.
 ....
 assert((pad ==  NULL) || size(pad) == 2 * size(shape));
 out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point=0, dim_t pad=NULL) {
-  assert(in_t == int8_t || zero_point == 0)
-  unsigned offset = 0;
-  for (i = 0; i < rank(shape); i++)
-    if (index[i] < 0) { assert(pad && pad[2 * i] + index[i] >= 0); return 0; }
-    if (index[i] >= shape[i]) { assert(pad && index[i] < shape[i] + pad[2 * i + 1]); return 0; }
-    offset = offset * shape[i] + index[i]
-  }
-  return address[offset] - zero_point;
+    assert(in_t == int8_t || zero_point == 0)
+    unsigned offset = 0;
+    for (i = 0; i < rank(shape); i++) {
+        if (index[i] < 0) {
+            assert(pad && pad[2 * i] + index[i] >= 0);
+            return 0;
+        }
+        if (index[i] >= shape[i]) {
+            assert(pad && index[i] < shape[i] + pad[2 * i + 1]);
+            return 0;
+        }
+        offset = offset * shape[i] + index[i];
+    }
+    return address[offset] - zero_point;
 }
 ....
 
@@ -212,12 +218,12 @@ out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point
 
 ....
 tensor_write<type>(<type> *address, dim_t shape, dim_t index, <type> value) {
-  unsigned offset = 0;
-  for (i = 0; i < rank(shape); i++)
-    assert (index[i] >= 0 && index[i] < shape[i]);
-    offset = offset * shape[i] + index[i];
-  }
-  address[offset] = value;
+    unsigned offset = 0;
+    for (i = 0; i < rank(shape); i++) {
+        assert (index[i] >= 0 && index[i] < shape[i]);
+        offset = offset * shape[i] + index[i];
+    }
+    address[offset] = value;
 }
 ....
 
@@ -346,7 +352,7 @@ All table lookups are based on the following reference lookup function that take
 ....
 int32_t apply_lookup(int16_t *table, int32_t value)
 {
-    int16_t clipped_value = apply_clip<int16_t>(value, -32768, +32767);
+    int16_t clipped_value = (int16_t)apply_clip<int32_t>(value, -32768, +32767);
     int32_t index = (clipped_value + 32768) >> 7;
     int32_t fraction = clipped_value & 0x7f;
     int16_t base = table[index];
@@ -364,7 +370,7 @@ void generate_lookup_table(int16_t *table, int32_t (*reference)(int32_t))
 {
     for (int i = -256; i <= 256; i++) {
         int32_t value = (*reference)(i);
-        table[i + 256] = apply_clip<int16_t>(value, -32768, +32767)
+        table[i + 256] = (int16_t)apply_clip<int32_t>(value, -32768, +32767)
     }
 }
 ....
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index 341f51d..b006c71 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -99,7 +99,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
     for_each(0 <= ky < kernel_y, 0 <= kx < kernel_x) {
         y = iy + ky;
         x = ix + kx;
-        in_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad);
+        acc_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad);
         acc = apply_add<acc_t>(acc, value);
         if (0 <= y < IH and 0 <= x < IW) count++
     }
@@ -108,7 +108,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
     } else {
         scale_t scale = reciprocal_scale(count);
         acc = apply_scale_32(acc, scale.multiplier, scale.shift, false);
-        output_val = apply_clip<in_t>(acc + output_zp, minimum<in_t>, maximum<in_t>)
+        output_val = (in_t)apply_clip<acc_t>(acc + output_zp, minimum<in_t>, maximum<in_t>)
     }
     tensor_write<in_t>(output, [N,H,W,OC], [n,oy,ox,oc], output_val);
 }
@@ -164,8 +164,8 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
     for_each(0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) {
         y = iy + ky * dilation_y;
         x = ix + kx * dilation_x;
-        in_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad);
-        weight_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp);
+        acc_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad);
+        acc_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp);
         acc = apply_add<acc_t>(acc, value * weight);
     }
     acc = apply_add<acc_t>(acc, bias[oc]);
@@ -227,8 +227,8 @@ for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
         d = id + kd * dilation_d;
         y = iy + ky * dilation_y;
         x = ix + kx * dilation_x;
-        in_t value  = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad);
-        weight_t weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp);
+        acc_t value  = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad);
+        acc_t weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp);
         acc = apply_add<acc_t>(acc, value * weight);
     }
     acc = apply_add<acc_t>(acc, bias[oc]);
@@ -289,8 +289,8 @@ for_each(0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < (C * M), 0 <= m < M) {
     for_each(0 <= ky < KH, 0 <= kx < KW) {
         y = iy + ky * dilation_y;
         x = ix + kx * dilation_x;
-        in_t value  = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c], input_zp, pad);
-        weight_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp);
+        acc_t value  = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c], input_zp, pad);
+        acc_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp);
         acc = apply_add<acc_t>(acc, value * weight);
     }
     acc = apply_add<acc_t>(acc, bias[(c * M) + m]);
@@ -342,8 +342,8 @@ assert(weight_t == int8_t || weight_zp == 0);
 for_each(0 <= n < N, 0 <= oc < OC) {
     acc_t acc = 0;
     for_each(0 <= ic < IC) {
-        in_t value  = tensor_read<in_t>(input, [N,IC], [n,ic], input_zp);
-        weight_t weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic], weight_zp);
+        acc_t value  = tensor_read<in_t>(input, [N,IC], [n,ic], input_zp);
+        acc_t weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic], weight_zp);
         acc = apply_add<acc_t>(acc, value * weight);
     }
     acc = apply_add<acc_t>(acc, bias[oc]);
@@ -392,8 +392,8 @@ assert(in_t == int8_t || (A_zp == 0 && B_zp == 0)); // Zero point only for int8_
 for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) {
     acc_t acc = 0;
     for_each(0 <= c < C) {
-        in_t value1 = tensor_read<in_t>(A, [N,H,C], [n,h,c], A_zp);
-        in_t value2 = tensor_read<in_t>(B, [N,C,W], [n,c,w], B_zp);
+        acc_t value1 = tensor_read<in_t>(A, [N,H,C], [n,h,c], A_zp);
+        acc_t value2 = tensor_read<in_t>(B, [N,C,W], [n,c,w], B_zp);
         acc = apply_add<acc_t>(acc, value1 * value2);
     }
     tensor_write<acc_t>(output, [N,H,W], [n,h,w], acc);
@@ -500,8 +500,8 @@ for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC,
     ox = ix * stride_x - out_pad_left + kx;
     if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) {
         acc_t acc = tensor_read<acc_t>(output, [N,OH,OW,OC], [n,oy,ox,oc]);
-        in_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic], input_zp);
-        weight_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp);
+        acc_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic], input_zp);
+        acc_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp);
         acc = apply_add<acc_t>(acc, value * weight);
         tensor_write<acc_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc);
     }
diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc
index 8f9e255..6701297 100644
--- a/chapters/type_conversion.adoc
+++ b/chapters/type_conversion.adoc
@@ -106,12 +106,12 @@ for_each(index in shape) {
     assert(in_t == int8_t  || in_t == uint8_t  || input_zp == 0);
     assert(out_t == int8_t || out_t == uint8_t || output_zp == 0);
     assert((scale32 && in_t != int48_t_t) || (!scale32 && !double_round));
-    int48_t_t value = tensor_read<in_t>(input, shape, index, input_zp);
+    int48_t value = tensor_read<in_t>(input, shape, index, input_zp);
     int c = (per_channel) ? index[dims-1] : 0;
     int32_t result = (scale32) ?
         apply_scale_32(value, multiplier[c], shift[c], double_round) :
         apply_scale_16(value, multiplier[c], shift[c]);
-    result = apply_clip<out_t>(result + output_zp, minimum<out_t>, maximum<out_t>);
+    result = (out_t)apply_clip<int32_t>(result + output_zp, minimum<out_t>, maximum<out_t>);
     tensor_write<out_t>(output, shape, index, result);
 }
 ....
-- 
cgit v1.2.1