From 8f57f9e092836a584d7b007b926a31fe2bc80b8a Mon Sep 17 00:00:00 2001
From: Eric Kunze <eric.kunze@arm.com>
Date: Tue, 17 Aug 2021 15:20:06 -0700
Subject: Fix pooling argument tables

Make the input tensor shape dimensions correct for the pseudocode.
Clarify the count used for average pooling only includes
valid elements.
Remove stale reference to PLACEHOLDER nodes.

Change-Id: Ia0b9f0aa404008c6a36671da12188cb0999712d4
Signed-off-by: Eric Kunze <eric.kunze@arm.com>
---
 chapters/ewise_binary.adoc |  9 +++++++--
 chapters/operators.adoc    |  2 --
 chapters/tensor_ops.adoc   | 20 ++++++++++++--------
 3 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index 2b1eadd..1a54a99 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -58,7 +58,7 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
 
 |Input|in_t*|input1|shape1|Input tensor
 |Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Input|bool_t |round |- | If true then the shift is rounded
+|Input|bool_t|round|-|If true then the shift is rounded
 |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
 |===
 
@@ -71,7 +71,12 @@ for_each(index in shape) {
     index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-    REQUIRE(0 <= value2 && value2 <= 31);
+
+    // Ensure that shift amount is appropriate for the data type
+    REQUIRE((in_t == int32_t && 0 <= value2 && value2 <= 31) ||
+            (in_t == int16_t && 0 <= value2 && value2 <= 15) ||
+            (in_t == int8_t && 0 <= value2 && value2 <= 7));
+
     in_t acc = value1 >> value2;
     if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) {
         acc = acc + 1;
diff --git a/chapters/operators.adoc b/chapters/operators.adoc
index 264063a..bec93c1 100644
--- a/chapters/operators.adoc
+++ b/chapters/operators.adoc
@@ -58,8 +58,6 @@ tosa_execute_graph(tosa_graph_t graph, tosa_list_t input_list, tosa_list_t outpu
 }
 ----
 
-Note that within the graph, each input operand is instantiated as a PLACEHOLDER operator.
-
 include::tensor_ops.adoc[]
 
 include::activation_funcs.adoc[]
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index 9a1c035..7334f67 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -77,7 +77,7 @@ This performs an average pooling over the given input tensor. A sliding window o
 |===
 |Argument|Type|Name|Shape|Description
 
-|Input|in_t*|input|[N,H,W,C]|Input tensor 4D
+|Input|in_t*|input|[N,IH,IW,C]|Input tensor 4D
 |Attribute|int*|kernel|[2]|[kernel_y, kernel_x]
 |Attribute|int*|stride|[2]|[stride_y, stride_x]
 |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
@@ -109,9 +109,13 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
     for_each(0 <= ky < kernel_y, 0 <= kx < kernel_x) {
         y = iy + ky;
         x = ix + kx;
-        acc_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad);
+        acc_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c], input_zp, pad);
         acc = apply_add<acc_t>(acc, value);
-        if (0 <= y < IH and 0 <= x < IW) count++
+        // Only values from the input tensor are used to calculate the
+        // average, padding does not count
+        if (0 <= y < IH and 0 <= x < IW) {
+            count++;
+        }
     }
     if (is_float(out_t)) {
         output_val = acc / (float)count;
@@ -120,7 +124,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
         acc = apply_scale_32(acc, scale.multiplier, scale.shift, false);
         output_val = (in_t)apply_clip<acc_t>(acc + output_zp, minimum<in_t>, maximum<in_t>)
     }
-    tensor_write<in_t>(output, [N,H,W,OC], [n,oy,ox,oc], output_val);
+    tensor_write<in_t>(output, [N,H,W,C], [n,oy,ox,c], output_val);
 }
 ----
 
@@ -428,7 +432,7 @@ This performs a max pooling over the given input tensor. A sliding window of siz
 |===
 |Argument|Type|Name|Shape|Description
 
-|Input|in_t*|input|[N,H,W,C]|Input tensor 4D
+|Input|in_t*|input|[N,IH,IW,C]|Input tensor 4D
 |Attribute|int*|kernel|[2]|[kernel_y, kernel_x]
 |Attribute|int*|stride|[2]|[stride_y, stride_x]
 |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
@@ -448,13 +452,13 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
     in_t acc = minimum_value<in_t>;
     iy = oy * stride_y - pad_top;
     ix = ox * stride_x - pad_left;
-    for_each( 0<=ky<kernel_y, 0<=kx<kernel_x ) {
+    for_each( 0 <= ky < kernel_y, 0 <= kx < kernel_x ) {
         y = iy + ky;
         x = ix + kx;
-        in_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], pad);
+        in_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c], pad);
         acc = apply_max(acc, value);
     }
-    tensor_write<in_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc);
+    tensor_write<in_t>(output, [N,H,W,C], [n,oy,ox,c], acc);
 }
 ----
 
-- 
cgit v1.2.1