Allow PAD operator to pad with non-zero

PAD now takes an additional attribute, with the padding value. Will generally be zero, but other values are allowed. tensor_read now requires the coordinates to be within the given tensor, with unpredictable behavior occurring if an access outside of the tensor occurs. Callers of tensor_read are expected to check the coordinates and take the appropriate action. The primary impact of this is to move the responsibility for padding to each operator. In practice, this is expected to not be a functional change, but a cleanup to make the behavior more clear. Signed-off-by: Eric Kunze <eric.kunze@arm.com> Change-Id: I4f21ca9a13d82d422bbd66c400f23aa9a0bd2aa0
author: Eric Kunze <eric.kunze@arm.com> 2021-09-16 14:51:26 -0700
committer: Eric Kunze <eric.kunze@arm.com> 2021-09-29 08:27:00 -0700
commit: c949f8a3a554728ccb6ce0ee0992fde382160cda (patch)
tree: c7a825ad9000d5a8bd10cf95d89d4d74b30a9cf8
parent: d921624f8c2918b534575e3031af83f24c2a2ea0 (diff)
download: specification-c949f8a3a554728ccb6ce0ee0992fde382160cda.tar.gz
3 files changed, 44 insertions, 31 deletions
diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc
index 834030c..4368474 100644
--- a/chapters/data_layout.adoc
+++ b/chapters/data_layout.adoc
@@ -57,7 +57,8 @@ for_each(index1 in shape) {
 
 ==== PAD
 
-Zero-pads a tensor along borders of each dimension.
+Pads a tensor along the borders of each dimension with a supplied value.
+Returns a new tensor with the padding included.
 
 *Arguments:*
 
@@ -66,6 +67,7 @@ Zero-pads a tensor along borders of each dimension.
 
 |Input|in_t*|input1|shape1|Input tensor
 |Attribute|int|padding|[rank(input1),2]|Amount of padding to be done
+|Attribute|in_t|pad_const|-|Constant value to be used as padding
 |Output|in_t*|output|shape|Output tensor of same type as the input tensor
 |===
 
@@ -82,16 +84,20 @@ Zero-pads a tensor along borders of each dimension.
 [source,c++]
 ----
 ERROR_IF(in_t != int8_t  && input1_zp != 0); // Zero point only allowed for int8_t
-// Pad values must be >= 0.
-for_each(value in padding) {
-    ERROR_IF(value < 0);
+// Padding sizes must be >= 0.
+for_each(pad_size in padding) {
+    ERROR_IF(pad_size < 0);
 }
 for_each(index in shape) {
     index1 = index;
+    bool_t is_pad = false;
     for(i = 0; i < rank(shape); i++) {
         index1[i] = index1[i] - padding[i,0];
+        if (index1[i] < 0 || index[i] >= length(shape[i])) {
+            is_pad = true;
+        }
     }
-    acc_t value = tensor_read<in_t>(input1, shape1, index1, input1_zp, padding);
+    acc_t value = is_pad ? pad_const : tensor_read<in_t>(input1, shape1, index1, input1_zp);
     tensor_write<in_t>(output, shape, index, value + input1_zp);
 }
 ----
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index acf3b69..93276f1 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -249,14 +249,17 @@ The tensor shape in each dimension must be greater than or equal to 1.
 The following pseudocode represents the operations that will happen to data elements as they are read in to be processed, or have their results written out.
 
 *Functionality of tensor read*
-If in_t is 8-bit then out_t=int16_t. Otherwise out_t is set to the same as in_t.
-If padding is specified, the size of the padding array should be 2 times the size of the shape.
-The padding array represents the before and after pair for each dimension.
+
+tensor_read reads a single data value out of the given tensor.
+The shape argument contains the shape of the tensor.
+Index is the coordinates within the tensor of the value to be read.
+zero_point is the zero point value to be added for int8 values.
+If in_t is 8-bit then out_t=int16_t to account for the zero_point subtraction.
+Otherwise out_t is the same as in_t.
 
 [source,c++]
 ----
-out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point=0, dim_t pad=NULL) {
-    ERROR_IF((pad !=  NULL) && size(pad) != 2 * size(shape));
+out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point=0) {
     ERROR_IF(in_t != int8_t && zero_point != 0);
     // Ensure this is a proper tensor with each dimension having size >= 1
     for_each(dimension_size in shape) {
@@ -264,14 +267,7 @@ out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point
     }
     unsigned offset = 0;
     for (i = 0; i < rank(shape); i++) {
-        if (index[i] < 0) {
-            REQUIRE(pad && pad[2 * i] + index[i] >= 0);
-            return 0;
-        }
-        if (index[i] >= shape[i]) {
-            REQUIRE(pad && index[i] < shape[i] + pad[2 * i + 1]);
-            return 0;
-        }
+        REQUIRE(index[i] >= 0 && index[i] < shape[i]);
         offset = offset * shape[i] + index[i];
     }
     return address[offset] - zero_point;
@@ -280,6 +276,11 @@ out_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index, in_t zero_point
 
 *Functionality of tensor write*
 
+tensor_write writes a single data value into the given tensor.
+The shape argument contains the shape of the tensor.
+Index is the coordinates within the tensor of the value to be written.
+value is the value to be written to the given coordinate.
+
 [source,c++]
 ----
 tensor_write<type>(<type> *address, dim_t shape, dim_t index, <type> value) {
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index ff5f25a..16b0341 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -102,7 +102,7 @@ ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
 ERROR_IF(in_t != int8_t && output_zp != 0); // Zero point only for int8_t
 ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1
 ERROR_IF(stride_y < 1 || stride_x < 1);
-ERROR_IF(pad_top < 0 || pad_buttom < 0 || pad_left < 0 || pad_right < 0);
+ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0);
 // Padding must be less than kernel size to avoid
 // a divide-by-zero.
 ERROR_IF(pad_right >= kernel_x || pad_left >= kernel_x);
@@ -118,12 +118,12 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
     for_each(0 <= ky < kernel_y, 0 <= kx < kernel_x) {
         y = iy + ky;
         x = ix + kx;
-        acc_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c], input_zp, pad);
-        acc = apply_add<acc_t>(acc, value);
         // Only values from the input tensor are used to calculate the
         // average, padding does not count
         if (0 <= y < IH and 0 <= x < IW) {
             count++;
+            acc_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c], input_zp);
+            acc = apply_add<acc_t>(acc, value);
         }
     }
     if (is_float(out_t)) {
@@ -190,9 +190,11 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
     for_each(0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) {
         y = iy + ky * dilation_y;
         x = ix + kx * dilation_x;
-        acc_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad);
-        acc_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp);
-        acc = apply_add<acc_t>(acc, value * weight);
+        if (0 <= y < IH && 0 <= x < IW) {
+            acc_t value  = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic], input_zp);
+            acc_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp);
+            acc = apply_add<acc_t>(acc, value * weight);
+        }
     }
     acc = apply_add<acc_t>(acc, bias[oc]);
     tensor_write<acc_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc);
@@ -256,9 +258,11 @@ for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
         d = id + kd * dilation_d;
         y = iy + ky * dilation_y;
         x = ix + kx * dilation_x;
-        acc_t value  = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad);
-        acc_t weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp);
-        acc = apply_add<acc_t>(acc, value * weight);
+        if (0 <= x < IW && 0 <= y < IH && 0 <= d <= ID) {
+            acc_t value  = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp);
+            acc_t weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp);
+            acc = apply_add<acc_t>(acc, value * weight);
+        }
     }
     acc = apply_add<acc_t>(acc, bias[oc]);
     tensor_write<acc_t>(output, [N,D,H,W,OC], [n,od,oy,ox,oc], acc);
@@ -321,9 +325,11 @@ for_each(0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < (C * M), 0 <= m < M) {
     for_each(0 <= ky < KH, 0 <= kx < KW) {
         y = iy + ky * dilation_y;
         x = ix + kx * dilation_x;
-        acc_t value  = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c], input_zp, pad);
-        acc_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp);
-        acc = apply_add<acc_t>(acc, value * weight);
+        if (0 <= y < IH && 0 <= x < IW) {
+            acc_t value  = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c], input_zp);
+            acc_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp);
+            acc = apply_add<acc_t>(acc, value * weight);
+        }
     }
     acc = apply_add<acc_t>(acc, bias[(c * M) + m]);
     tensor_write<acc_t>(output, [N,H,W,C * M], [n,oy,ox,c * M + m], acc);
@@ -467,7 +473,7 @@ None
 ----
 ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1
 ERROR_IF(stride_y < 1 || stride_x < 1);
-ERROR_IF(pad_top < 0 || pad_buttom < 0 || pad_left < 0 || pad_right < 0);
+ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0);
 // Padding must be less than kernel size, otherwise no
 // input values will be used.
 ERROR_IF(pad_right >= kernel_x || pad_left >= kernel_x);
author	Eric Kunze <eric.kunze@arm.com>	2021-09-16 14:51:26 -0700
committer	Eric Kunze <eric.kunze@arm.com>	2021-09-29 08:27:00 -0700
commit	c949f8a3a554728ccb6ce0ee0992fde382160cda (patch)
tree	c7a825ad9000d5a8bd10cf95d89d4d74b30a9cf8
parent	d921624f8c2918b534575e3031af83f24c2a2ea0 (diff)
download	specification-c949f8a3a554728ccb6ce0ee0992fde382160cda.tar.gz