From 9424cc4d4c5002c1da441f20052342b6b1b6ac7c Mon Sep 17 00:00:00 2001
From: Eric Kunze <eric.kunze@arm.com>
Date: Wed, 4 Nov 2020 13:49:18 -0800
Subject: Fix output argument type for convolutions

Was out_t, which wasn't in the data types table.
Switch to acc_t, the size of the accumulator, which
is the output type for the convolutions.

Also added some types into the pseudocode to clarify
what types variables are.

Signed-off-by: Eric Kunze <eric.kunze@arm.com>
Change-Id: I210220199c2d39b16938094022339286df040545
---
 chapters/tensor_ops.adoc | 60 ++++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 30 deletions(-)
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index 9bb6496..dda04db 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -34,13 +34,13 @@ None
 assert(axis >= 0 && axis < k && k <=4)
 left_shape = input_shape[0:axis-1]
 right_shape = input_shape[axis+1:k-1]
-assert( concat(left_shape,    right_shape) == output_shape )
+assert( concat(left_shape, right_shape) == output_shape )
 for_each ( left_index in left_shape, right_index in right_shape )
     in_t max_value = minimum_value<in_t>
     int32 max_index = 0;
     for (i=0; i<shape[axis]; i++) {
         index = concat(left_index, [i], right_index)
-        value = tensor_read<in_t>(input, input_shape, index)
+        in_t value = tensor_read<in_t>(input, input_shape, index)
         if (value > max_value) { max_value = value; max_index=i; }
     }
     index = concat(left_index, right_index)
@@ -98,14 +98,14 @@ for_each ( 0<=n<N, 0<=oy<H, 0<=ox<W, 0<=c<C ) {
     for_each ( 0<=ky<kernel_y, 0<=kx<kernel_x) {
         y = iy + ky
         x = ix + kx
-        value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad)
-        acc = apply_add<32>(acc, value)
+        in_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad)
+        acc = apply_add<acc_t>(acc, value)
         if (0<=y<IH and 0<=x<IW) count++
     }
     if (is_float(out_t)) {
       value = value / (float)count;
     } else {
-      scale = reciprocal_scale(count)
+      scale_t scale = reciprocal_scale(count)
       acc = apply_scale_32(acc, scale.multiplier, scale.shift, false)
       acc = apply_clip(acc + output_zp, output_min, output_max)
     }
@@ -137,7 +137,7 @@ Performs a 2D convolution over the given tensor input, using the weight tensor.
 |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
 |Attribute|int*|stride|[2]|[stride_y, stride_x]
 |Attribute|int*|dilation|[2]|[dilation_y, dilation_x]
-|Output|out_t*|output|[N,H,W,OC]|Output tensor
+|Output|acc_t*|output|[N,H,W,OC]|Output tensor
 |===
 
 *Quantization Parameters:*
@@ -163,8 +163,8 @@ for_each (0<=n<N, 0<=oy<H, 0<=ox<W; 0<=oc<OC) {
     for_each (0<=ky<KH, 0<=kx<KW, 0<=ic<IC) {
         y = iy + ky * dilation_y
         x = ix + kx * dilation_x
-        value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad)
-        weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp)
+        in_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad)
+        weight_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp)
         acc = apply_add<acc_t>(acc, value * weight)
     }
     acc = apply_add<acc_t>(acc, bias[oc])
@@ -198,7 +198,7 @@ Performs a 3D convolution over the given input tensor.
 |Attribute|int*|pad|[6]|[pad_d0, pad_d1, pad_top, pad_bottom, pad_left, pad_right]
 |Attribute|int*|stride|[3]|[stride_d, stride_y, stride_x]
 |Attribute|int*|dilation|[3]|[dilation_d, dilation_y, dilation_x]
-|Output|out_t*|output|[N,D,H,W,OC]|Output tensor
+|Output|acc_t*|output|[N,D,H,W,OC]|Output tensor
 |===
 
 *Quantization Parameters:*
@@ -226,8 +226,8 @@ for_each (0<=n<N, 0<=od<D, 0<=oy<H, 0<=ox<W; 0<=oc<OC) {
         d = id + kd * dilation_d
         y = iy + ky * dilation_y
         x = ix + kx * dilation_x
-        value  = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad)
-        weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp)
+        in_t value  = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad)
+        weight_t weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp)
         acc = apply_add<acc_t>(acc, value * weight)
     }
     acc = apply_add<acc_t>(acc, bias[oc])
@@ -242,7 +242,7 @@ for_each (0<=n<N, 0<=od<D, 0<=oy<H, 0<=ox<W; 0<=oc<OC) {
 
 |Any|signed 8x8|aint8|int8,aint8|int32
 |Any|signed 8x4|aint8|int4|int32
-|Any|signed 16x8 |int16|int8|int 48
+|Any|signed 16x8|int16|int8|int48
 |MI, MT|float|float|float|float
 |===
 
@@ -262,7 +262,7 @@ Performs 2D convolutions separately over each channel of the given tensor input,
 |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
 |Attribute|int*|stride|[2]|[stride_y, stride_x]
 |Attribute|int*|dilation|[2]|[dilation_y, dilation_x]
-|Output|out_t*|output|[N,H,W,C*M]|Output tensor
+|Output|acc_t*|output|[N,H,W,C*M]|Output tensor
 |===
 
 *Quantization Parameters:*
@@ -288,8 +288,8 @@ for_each (0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < (C * M), 0 <= m < M) {
     for_each (0<=ky<KH, 0<=kx<KW) {
         y = iy + ky * dilation_y
         x = ix + kx * dilation_x
-        value  = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c], input_zp, pad)
-        weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp)
+        in_t value  = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c], input_zp, pad)
+        weight_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp)
         acc = apply_add<acc_t>(acc, value * weight)
     }
     acc = apply_add<acc_t>(acc, bias[(c*M) + m])
@@ -320,7 +320,7 @@ Performs a fully connected network.
 |Input|in_t*|input|[N,IC]|Input tensor
 |Attribute|weight_t*|weight|[OC,IC]|Weights
 |Attribute|acc_t*|bias|[OC]|Per output channel bias data.
-|Output|out_t*|output|[N,OC]|Output tensor
+|Output|acc_t*|output|[N,OC]|Output tensor
 |===
 
 *Quantization Parameters:*
@@ -341,8 +341,8 @@ assert(weight_t == aint8_t || weight_zp == 0)
 for_each (0<=n<N, 0<=oc<OC) {
     acc_t acc = 0
     for_each (0<=ic<IC) {
-        value  = tensor_read<in_t>(input, [N,IC], [n,ic], input_zp)
-        weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic], weight_zp)
+        in_t value  = tensor_read<in_t>(input, [N,IC], [n,ic], input_zp)
+        weight_t weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic], weight_zp)
         acc = apply_add<acc_t>(acc, value * weight)
     }
     acc = apply_add<acc_t>(acc, bias[oc])
@@ -371,7 +371,7 @@ Performs a two dimensional matrix multiplication. This allows both inputs to be
 
 |Input|in_t*|A|[M,K]|Input tensor A
 |Input|in_t*|B|[K,N]|Input tensor B
-|Output|out_t*|C|[M,N]|Output tensor C
+|Output|acc_t*|C|[M,N]|Output tensor C
 |===
 
 *Quantization Parameters:*
@@ -391,8 +391,8 @@ assert(in_t==aint8_t || (A_zp==0 && B_zp==0)) // Zero point only for asymmetric
 for_each (0<=m<M, 0<=n<N) {
     acc_t acc = 0
     for_each (0<=k<K) {
-        value1 = tensor_read<in_t>(A, [M,K], [m,k], A_zp)
-        value2 = tensor_read<in_t>(B, [K,N], [k,n], B_zp)
+        in_t value1 = tensor_read<in_t>(A, [M,K], [m,k], A_zp)
+        in_t value2 = tensor_read<in_t>(B, [K,N], [k,n], B_zp)
         acc = apply_add<acc_t>(acc, value1 * value2)
     }
     tensor_write<acc_t>(C, [M,N], [m,n], acc)
@@ -434,13 +434,13 @@ None
 ----
 pad=concat([0,0],pad,[0,0])
 for_each ( 0<=n<N, 0<=oy<H, 0<=ox<W, 0<=c<C ) {
-    int32_t acc = minimum_value<in_t>;
+    in_t acc = minimum_value<in_t>;
     iy = oy * stride_y - pad_top
     ix = ox * stride_x - pad_left
     for_each ( 0<=ky<kernel_y, 0<=kx<kernel_x ) {
         y = iy + ky
         x = ix + kx
-        value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], pad)
+        in_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], pad)
         acc = apply_max(acc, value)
     }
     tensor_write<out_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc)
@@ -469,11 +469,11 @@ Performs a 2D transposed convolution over the given tensor input, using the weig
 |Input|in_t*|input|[N,IH,IW,IC]|Input tensor
 |Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW
 |Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[OC]|Per output channel bias data.
-|Attribute|int*|outpad|[2]|[outpad_top, outpad_left]
+|Attribute|int*|out_pad|[2]|[out_pad_top, out_pad_left]
 |Attribute|int*|stride|[2]|[stride_y, stride_x]
 |Attribute|int*|dilation|[2]|[dilation_y, dilation_x]
 |Attribute|int*|out_shape|[4]|[N,OH,OW,OC]
-|Output|out_t*|output|[N,OH,OW,OC]|Output tensor
+|Output|acc_t*|output|[N,OH,OW,OC]|Output tensor
 |===
 
 *Quantization Parameters:*
@@ -495,12 +495,12 @@ for_each (index in out_shape) {
     tensor_write<acc_t>(output, [N,OH,OW,OC], index, bias[index[3]])
 }
 for_each (0<=n<N, 0<=iy<IH, 0<=ix<IW, 0<=oc<OC, 0<=ic<IC, 0<=ky<KH,  0<=kx<KW) {
-    oy = iy * stride_y - outpad_top  + ky
-    ox = ix * stride_x - outpad_left + kx
+    oy = iy * stride_y - out_pad_top  + ky
+    ox = ix * stride_x - out_pad_left + kx
     if (oy>=0 && oy<OH && ox>=0 && ox<OW) {
-        acc = tensor_read<acc_t>(output, [N,OH,OW,OC], [n,oy,ox,oc])
-        value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic], input_zp)
-        weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp)
+        acc_t acc = tensor_read<acc_t>(output, [N,OH,OW,OC], [n,oy,ox,oc])
+        in_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic], input_zp)
+        weight_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp)
         acc = apply_add<acc_t>(acc, value * weight)
         tensor_write<acc_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc)
     }
-- 
cgit v1.2.1