aboutsummaryrefslogtreecommitdiff
path: root/chapters/ewise_binary.adoc
diff options
context:
space:
mode:
Diffstat (limited to 'chapters/ewise_binary.adoc')
-rw-r--r--chapters/ewise_binary.adoc123
1 files changed, 78 insertions, 45 deletions
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index 864cf5b..35e454a 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -18,14 +18,22 @@ include::{generated}/operators/ADD.adoc[]
[source,c++]
----
-ERROR_IF(shape != broadcast_shape(shape1, shape2));
-for_each(index in shape) {
- dim_t index1 = apply_broadcast(shape, shape1, index);
- dim_t index2 = apply_broadcast(shape, shape2, index);
- in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
- in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
- in_out_t result = apply_add<in_out_t>(value1, value2);
- tensor_write<in_out_t>(output, shape, index, result);
+if (in_out_t == shape_t) {
+ ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
+ shape_t value1 = tensor_read<shape_t>(input1, [], []);
+ shape_t value2 = tensor_read<shape_t>(input2, [], []);
+ shape_t result = apply_add<shape_t>(value1, value2);
+ tensor_write<shape_t>(output, [], [], result);
+} else {
+ ERROR_IF(shape != broadcast_shape(shape1, shape2));
+ for_each(index in shape) {
+ dim_t index1 = apply_broadcast(shape, shape1, index);
+ dim_t index2 = apply_broadcast(shape, shape2, index);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = apply_add<in_out_t>(value1, value2);
+ tensor_write<in_out_t>(output, shape, index, result);
+ }
}
----
@@ -131,18 +139,27 @@ include::{generated}/operators/INTDIV.adoc[]
[source,c++]
----
-ERROR_IF(shape != broadcast_shape(shape1, shape2));
-for_each(index in shape) {
- dim_t index1 = apply_broadcast(shape, shape1, index);
- dim_t index2 = apply_broadcast(shape, shape2, index);
- in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
- in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+if (in_out_t == shape_t) {
+ ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
+ shape_t value1 = tensor_read<shape_t>(input1, [], []);
+ shape_t value2 = tensor_read<shape_t>(input2, [], []);
REQUIRE(value2 != 0);
- // This catches the case where we divide minimum<in_out_t> by -1
- // which is not representable in two's complement
- REQUIRE((int64_t)value1 / value2 <= maximum<in_out_t>);
- in_out_t result = value1 / value2;
- tensor_write<in_out_t>(output, shape, index, result);
+ shape_t result = value1 / value2;
+ tensor_write<shape_t>(output, [], [], result);
+} else {
+ ERROR_IF(shape != broadcast_shape(shape1, shape2));
+ for_each(index in shape) {
+ dim_t index1 = apply_broadcast(shape, shape1, index);
+ dim_t index2 = apply_broadcast(shape, shape2, index);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ REQUIRE(value2 != 0);
+ // This catches the case where we divide minimum<in_out_t> by -1
+ // which is not representable in two's complement
+ REQUIRE((int64_t)value1 / value2 <= maximum<in_out_t>);
+ in_out_t result = value1 / value2;
+ tensor_write<in_out_t>(output, shape, index, result);
+ }
}
----
@@ -297,25 +314,33 @@ include::{generated}/operators/MUL.adoc[]
[source,c++]
----
-REQUIRE(0 <= shift && shift <= 63);
-REQUIRE(in_t == int32_t || shift == 0);
-ERROR_IF(shape != broadcast_shape(shape1, shape2));
-for_each(index in shape) {
- dim_t index1 = apply_broadcast(shape, shape1, index);
- dim_t index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- out_t result;
- if (in_t == int32_t && shift > 0) {
- int64_t product = (int64_t)value1 * (int64_t)value2;
- int64_t round = (int64_t)1 << (shift-1);
- product = (product + round) >> shift;
- REQUIRE(product >= minimum<int32_t> && product <= maximum<int32_t>)
- result = product;
- } else {
- result = value1 * value2; // low 32-bits of result for int32_t
+if (in_out_t == shape_t) {
+ ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
+ shape_t value1 = tensor_read<shape_t>(input1, [], []);
+ shape_t value2 = tensor_read<shape_t>(input2, [], []);
+ shape_t result = value1 * value2;
+ tensor_write<shape_t>(output, [], [], result);
+} else {
+ REQUIRE(0 <= shift && shift <= 63);
+ REQUIRE(in_t == int32_t || shift == 0);
+ ERROR_IF(shape != broadcast_shape(shape1, shape2));
+ for_each(index in shape) {
+ dim_t index1 = apply_broadcast(shape, shape1, index);
+ dim_t index2 = apply_broadcast(shape, shape2, index);
+ in_t value1 = tensor_read<in_t>(input1, shape1, index1);
+ in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+ out_t result;
+ if (in_t == int32_t && shift > 0) {
+ int64_t product = (int64_t)value1 * (int64_t)value2;
+ int64_t round = (int64_t)1 << (shift-1);
+ product = (product + round) >> shift;
+ REQUIRE(product >= minimum<int32_t> && product <= maximum<int32_t>)
+ result = product;
+ } else {
+ result = value1 * value2; // low 32-bits of result for int32_t
+ }
+ tensor_write<out_t>(output, shape, index, result);
}
- tensor_write<out_t>(output, shape, index, result);
}
----
@@ -348,14 +373,22 @@ include::{generated}/operators/SUB.adoc[]
[source,c++]
----
-ERROR_IF(shape != broadcast_shape(shape1, shape2));
-for_each(index in shape) {
- dim_t index1 = apply_broadcast(shape, shape1, index);
- dim_t index2 = apply_broadcast(shape, shape2, index);
- in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
- in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
- in_out_t result = apply_sub<in_out_t>(value1, value2);
- tensor_write<in_out_t>(output, shape, index, result);
+if (in_out_t == shape_t) {
+ ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
+ shape_t value1 = tensor_read<shape_t>(input1, [], []);
+ shape_t value2 = tensor_read<shape_t>(input2, [], []);
+ shape_t result = apply_sub<shape_t>(value1, value2);
+ tensor_write<shape_t>(output, [], [], result);
+} else {
+ ERROR_IF(shape != broadcast_shape(shape1, shape2));
+ for_each(index in shape) {
+ dim_t index1 = apply_broadcast(shape, shape1, index);
+ dim_t index2 = apply_broadcast(shape, shape2, index);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = apply_sub<in_out_t>(value1, value2);
+ tensor_write<in_out_t>(output, shape, index, result);
+ }
}
----