Add DIM operator and operations on shape_t values

Shape inference derives the shape of tensors in the graph from input shapes. Operations such as RESHAPE may need calculations to derive the new tensor shape. This patch: - Adds a DIM operator to get the size of a tensor in a given axis as a rank 0 tensor of type shape_t - Allows RESHAPE to take a 1D shape tensor as input for the new shape - Allows RESIZE, TILE, PAD to take input sizes based on shape tensors. - Allows ADD, SUB, MUL, INTDIV to operate on rank 0 shape_t tensors - Allows CONCAT to concatenate 0D shape_t tensors to a 1D shape_t tensor - Adds CONST support for shape_t tensors In this version of the specification shape tensors must be resolvable to constants at backend compile time. Signed-off-by: Dominic Symes <dominic.symes@arm.com> Change-Id: I484bd44452453b5e05d0d8a82689564587b224e4
author: Dominic Symes <dominic.symes@arm.com> 2023-05-09 10:14:49 +0100
committer: Dominic Symes <dominic.symes@arm.com> 2023-08-16 11:50:24 +0100
commit: 830b43b1d1bd82edd57dee1f5cac12e2b5cf0e04 (patch)
tree: 971d15dc6ae00118d7be2df9a50dd7445d8428d9 /chapters/ewise_binary.adoc
parent: e1f517c541a61d18defc671028b24824c1eadd57 (diff)
download: specification-830b43b1d1bd82edd57dee1f5cac12e2b5cf0e04.tar.gz
1 files changed, 78 insertions, 45 deletions
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index 864cf5b..35e454a 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -18,14 +18,22 @@ include::{generated}/operators/ADD.adoc[]
 
 [source,c++]
 ----
-ERROR_IF(shape != broadcast_shape(shape1, shape2));
-for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
-    in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
-    in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
-    in_out_t result = apply_add<in_out_t>(value1, value2);
-    tensor_write<in_out_t>(output, shape, index, result);
+if (in_out_t == shape_t) {
+    ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
+    shape_t value1 = tensor_read<shape_t>(input1, [], []);
+    shape_t value2 = tensor_read<shape_t>(input2, [], []);
+    shape_t result = apply_add<shape_t>(value1, value2);
+    tensor_write<shape_t>(output, [], [], result);
+} else {
+    ERROR_IF(shape != broadcast_shape(shape1, shape2));
+    for_each(index in shape) {
+        dim_t index1 = apply_broadcast(shape, shape1, index);
+        dim_t index2 = apply_broadcast(shape, shape2, index);
+        in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+        in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+        in_out_t result = apply_add<in_out_t>(value1, value2);
+        tensor_write<in_out_t>(output, shape, index, result);
+    }
 }
 ----
 
@@ -131,18 +139,27 @@ include::{generated}/operators/INTDIV.adoc[]
 
 [source,c++]
 ----
-ERROR_IF(shape != broadcast_shape(shape1, shape2));
-for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
-    in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
-    in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+if (in_out_t == shape_t) {
+    ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
+    shape_t value1 = tensor_read<shape_t>(input1, [], []);
+    shape_t value2 = tensor_read<shape_t>(input2, [], []);
     REQUIRE(value2 != 0);
-    // This catches the case where we divide minimum<in_out_t> by -1
-    // which is not representable in two's complement
-    REQUIRE((int64_t)value1 / value2 <= maximum<in_out_t>);
-    in_out_t result = value1 / value2;
-    tensor_write<in_out_t>(output, shape, index, result);
+    shape_t result = value1 / value2;
+    tensor_write<shape_t>(output, [], [], result);
+} else {
+    ERROR_IF(shape != broadcast_shape(shape1, shape2));
+    for_each(index in shape) {
+        dim_t index1 = apply_broadcast(shape, shape1, index);
+        dim_t index2 = apply_broadcast(shape, shape2, index);
+        in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+        in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+        REQUIRE(value2 != 0);
+        // This catches the case where we divide minimum<in_out_t> by -1
+        // which is not representable in two's complement
+        REQUIRE((int64_t)value1 / value2 <= maximum<in_out_t>);
+        in_out_t result = value1 / value2;
+        tensor_write<in_out_t>(output, shape, index, result);
+    }
 }
 ----
 
@@ -297,25 +314,33 @@ include::{generated}/operators/MUL.adoc[]
 
 [source,c++]
 ----
-REQUIRE(0 <= shift && shift <= 63);
-REQUIRE(in_t == int32_t || shift == 0);
-ERROR_IF(shape != broadcast_shape(shape1, shape2));
-for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
-    in_t value1 = tensor_read<in_t>(input1, shape1, index1);
-    in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-    out_t result;
-    if (in_t == int32_t && shift > 0) {
-        int64_t product = (int64_t)value1 * (int64_t)value2;
-        int64_t round   = (int64_t)1 << (shift-1);
-        product = (product + round) >> shift;
-        REQUIRE(product >= minimum<int32_t> && product <= maximum<int32_t>)
-        result = product;
-    } else {
-        result = value1 * value2;  // low 32-bits of result for int32_t
+if (in_out_t == shape_t) {
+    ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
+    shape_t value1 = tensor_read<shape_t>(input1, [], []);
+    shape_t value2 = tensor_read<shape_t>(input2, [], []);
+    shape_t result = value1 * value2;
+    tensor_write<shape_t>(output, [], [], result);
+} else {
+    REQUIRE(0 <= shift && shift <= 63);
+    REQUIRE(in_t == int32_t || shift == 0);
+    ERROR_IF(shape != broadcast_shape(shape1, shape2));
+    for_each(index in shape) {
+        dim_t index1 = apply_broadcast(shape, shape1, index);
+        dim_t index2 = apply_broadcast(shape, shape2, index);
+        in_t value1 = tensor_read<in_t>(input1, shape1, index1);
+        in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+        out_t result;
+        if (in_t == int32_t && shift > 0) {
+            int64_t product = (int64_t)value1 * (int64_t)value2;
+            int64_t round   = (int64_t)1 << (shift-1);
+            product = (product + round) >> shift;
+            REQUIRE(product >= minimum<int32_t> && product <= maximum<int32_t>)
+            result = product;
+        } else {
+            result = value1 * value2;  // low 32-bits of result for int32_t
+        }
+        tensor_write<out_t>(output, shape, index, result);
     }
-    tensor_write<out_t>(output, shape, index, result);
 }
 ----
 
@@ -348,14 +373,22 @@ include::{generated}/operators/SUB.adoc[]
 
 [source,c++]
 ----
-ERROR_IF(shape != broadcast_shape(shape1, shape2));
-for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
-    in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
-    in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
-    in_out_t result = apply_sub<in_out_t>(value1, value2);
-    tensor_write<in_out_t>(output, shape, index, result);
+if (in_out_t == shape_t) {
+    ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
+    shape_t value1 = tensor_read<shape_t>(input1, [], []);
+    shape_t value2 = tensor_read<shape_t>(input2, [], []);
+    shape_t result = apply_sub<shape_t>(value1, value2);
+    tensor_write<shape_t>(output, [], [], result);
+} else {
+    ERROR_IF(shape != broadcast_shape(shape1, shape2));
+    for_each(index in shape) {
+        dim_t index1 = apply_broadcast(shape, shape1, index);
+        dim_t index2 = apply_broadcast(shape, shape2, index);
+        in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+        in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+        in_out_t result = apply_sub<in_out_t>(value1, value2);
+        tensor_write<in_out_t>(output, shape, index, result);
+    }
 }
 ----
author	Dominic Symes <dominic.symes@arm.com>	2023-05-09 10:14:49 +0100
committer	Dominic Symes <dominic.symes@arm.com>	2023-08-16 11:50:24 +0100
commit	830b43b1d1bd82edd57dee1f5cac12e2b5cf0e04 (patch)
tree	971d15dc6ae00118d7be2df9a50dd7445d8428d9 /chapters/ewise_binary.adoc
parent	e1f517c541a61d18defc671028b24824c1eadd57 (diff)
download	specification-830b43b1d1bd82edd57dee1f5cac12e2b5cf0e04.tar.gz