3 files changed, 19 insertions, 12 deletions
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index 2b1eadd..1a54a99 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -58,7 +58,7 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
 
 |Input|in_t*|input1|shape1|Input tensor
 |Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Input|bool_t |round |- | If true then the shift is rounded
+|Input|bool_t|round|-|If true then the shift is rounded
 |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
 |===
 
@@ -71,7 +71,12 @@ for_each(index in shape) {
     index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-    REQUIRE(0 <= value2 && value2 <= 31);
+
+    // Ensure that shift amount is appropriate for the data type
+    REQUIRE((in_t == int32_t && 0 <= value2 && value2 <= 31) ||
+            (in_t == int16_t && 0 <= value2 && value2 <= 15) ||
+            (in_t == int8_t && 0 <= value2 && value2 <= 7));
+
     in_t acc = value1 >> value2;
     if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) {
         acc = acc + 1;
diff --git a/chapters/operators.adoc b/chapters/operators.adoc
index 264063a..bec93c1 100644
--- a/chapters/operators.adoc
+++ b/chapters/operators.adoc
@@ -58,8 +58,6 @@ tosa_execute_graph(tosa_graph_t graph, tosa_list_t input_list, tosa_list_t outpu
 }
 ----
 
-Note that within the graph, each input operand is instantiated as a PLACEHOLDER operator.
-
 include::tensor_ops.adoc[]
 
 include::activation_funcs.adoc[]
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index 9a1c035..7334f67 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -77,7 +77,7 @@ This performs an average pooling over the given input tensor. A sliding window o
 |===
 |Argument|Type|Name|Shape|Description
 
-|Input|in_t*|input|[N,H,W,C]|Input tensor 4D
+|Input|in_t*|input|[N,IH,IW,C]|Input tensor 4D
 |Attribute|int*|kernel|[2]|[kernel_y, kernel_x]
 |Attribute|int*|stride|[2]|[stride_y, stride_x]
 |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
@@ -109,9 +109,13 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
     for_each(0 <= ky < kernel_y, 0 <= kx < kernel_x) {
         y = iy + ky;
         x = ix + kx;
-        acc_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad);
+        acc_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c], input_zp, pad);
         acc = apply_add<acc_t>(acc, value);
-        if (0 <= y < IH and 0 <= x < IW) count++
+        // Only values from the input tensor are used to calculate the
+        // average, padding does not count
+        if (0 <= y < IH and 0 <= x < IW) {
+            count++;
+        }
     }
     if (is_float(out_t)) {
         output_val = acc / (float)count;
@@ -120,7 +124,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
         acc = apply_scale_32(acc, scale.multiplier, scale.shift, false);
         output_val = (in_t)apply_clip<acc_t>(acc + output_zp, minimum<in_t>, maximum<in_t>)
     }
-    tensor_write<in_t>(output, [N,H,W,OC], [n,oy,ox,oc], output_val);
+    tensor_write<in_t>(output, [N,H,W,C], [n,oy,ox,c], output_val);
 }
 ----
 
@@ -428,7 +432,7 @@ This performs a max pooling over the given input tensor. A sliding window of siz
 |===
 |Argument|Type|Name|Shape|Description
 
-|Input|in_t*|input|[N,H,W,C]|Input tensor 4D
+|Input|in_t*|input|[N,IH,IW,C]|Input tensor 4D
 |Attribute|int*|kernel|[2]|[kernel_y, kernel_x]
 |Attribute|int*|stride|[2]|[stride_y, stride_x]
 |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
@@ -448,13 +452,13 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
     in_t acc = minimum_value<in_t>;
     iy = oy * stride_y - pad_top;
     ix = ox * stride_x - pad_left;
-    for_each( 0<=ky<kernel_y, 0<=kx<kernel_x ) {
+    for_each( 0 <= ky < kernel_y, 0 <= kx < kernel_x ) {
         y = iy + ky;
         x = ix + kx;
-        in_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], pad);
+        in_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c], pad);
         acc = apply_max(acc, value);
     }
-    tensor_write<in_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc);
+    tensor_write<in_t>(output, [N,H,W,C], [n,oy,ox,c], acc);
 }
 ----