diff options
Diffstat (limited to 'chapters/ewise_binary.adoc')
-rw-r--r-- | chapters/ewise_binary.adoc | 123 |
1 files changed, 78 insertions, 45 deletions
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc index 864cf5b..35e454a 100644 --- a/chapters/ewise_binary.adoc +++ b/chapters/ewise_binary.adoc @@ -18,14 +18,22 @@ include::{generated}/operators/ADD.adoc[] [source,c++] ---- -ERROR_IF(shape != broadcast_shape(shape1, shape2)); -for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); - in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_add<in_out_t>(value1, value2); - tensor_write<in_out_t>(output, shape, index, result); +if (in_out_t == shape_t) { + ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); + shape_t value1 = tensor_read<shape_t>(input1, [], []); + shape_t value2 = tensor_read<shape_t>(input2, [], []); + shape_t result = apply_add<shape_t>(value1, value2); + tensor_write<shape_t>(output, [], [], result); +} else { + ERROR_IF(shape != broadcast_shape(shape1, shape2)); + for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); + in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); + in_out_t result = apply_add<in_out_t>(value1, value2); + tensor_write<in_out_t>(output, shape, index, result); + } } ---- @@ -131,18 +139,27 @@ include::{generated}/operators/INTDIV.adoc[] [source,c++] ---- -ERROR_IF(shape != broadcast_shape(shape1, shape2)); -for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); - in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); +if (in_out_t == shape_t) { + ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); + shape_t value1 = tensor_read<shape_t>(input1, [], []); + shape_t value2 = tensor_read<shape_t>(input2, [], []); REQUIRE(value2 != 0); - // This catches the case where we divide minimum<in_out_t> by -1 - // which is not representable in two's complement - REQUIRE((int64_t)value1 / value2 <= maximum<in_out_t>); - in_out_t result = value1 / value2; - tensor_write<in_out_t>(output, shape, index, result); + shape_t result = value1 / value2; + tensor_write<shape_t>(output, [], [], result); +} else { + ERROR_IF(shape != broadcast_shape(shape1, shape2)); + for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); + in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); + REQUIRE(value2 != 0); + // This catches the case where we divide minimum<in_out_t> by -1 + // which is not representable in two's complement + REQUIRE((int64_t)value1 / value2 <= maximum<in_out_t>); + in_out_t result = value1 / value2; + tensor_write<in_out_t>(output, shape, index, result); + } } ---- @@ -297,25 +314,33 @@ include::{generated}/operators/MUL.adoc[] [source,c++] ---- -REQUIRE(0 <= shift && shift <= 63); -REQUIRE(in_t == int32_t || shift == 0); -ERROR_IF(shape != broadcast_shape(shape1, shape2)); -for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read<in_t>(input1, shape1, index1); - in_t value2 = tensor_read<in_t>(input2, shape2, index2); - out_t result; - if (in_t == int32_t && shift > 0) { - int64_t product = (int64_t)value1 * (int64_t)value2; - int64_t round = (int64_t)1 << (shift-1); - product = (product + round) >> shift; - REQUIRE(product >= minimum<int32_t> && product <= maximum<int32_t>) - result = product; - } else { - result = value1 * value2; // low 32-bits of result for int32_t +if (in_out_t == shape_t) { + ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); + shape_t value1 = tensor_read<shape_t>(input1, [], []); + shape_t value2 = tensor_read<shape_t>(input2, [], []); + shape_t result = value1 * value2; + tensor_write<shape_t>(output, [], [], result); +} else { + REQUIRE(0 <= shift && shift <= 63); + REQUIRE(in_t == int32_t || shift == 0); + ERROR_IF(shape != broadcast_shape(shape1, shape2)); + for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_t value1 = tensor_read<in_t>(input1, shape1, index1); + in_t value2 = tensor_read<in_t>(input2, shape2, index2); + out_t result; + if (in_t == int32_t && shift > 0) { + int64_t product = (int64_t)value1 * (int64_t)value2; + int64_t round = (int64_t)1 << (shift-1); + product = (product + round) >> shift; + REQUIRE(product >= minimum<int32_t> && product <= maximum<int32_t>) + result = product; + } else { + result = value1 * value2; // low 32-bits of result for int32_t + } + tensor_write<out_t>(output, shape, index, result); } - tensor_write<out_t>(output, shape, index, result); } ---- @@ -348,14 +373,22 @@ include::{generated}/operators/SUB.adoc[] [source,c++] ---- -ERROR_IF(shape != broadcast_shape(shape1, shape2)); -for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); - in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_sub<in_out_t>(value1, value2); - tensor_write<in_out_t>(output, shape, index, result); +if (in_out_t == shape_t) { + ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); + shape_t value1 = tensor_read<shape_t>(input1, [], []); + shape_t value2 = tensor_read<shape_t>(input2, [], []); + shape_t result = apply_sub<shape_t>(value1, value2); + tensor_write<shape_t>(output, [], [], result); +} else { + ERROR_IF(shape != broadcast_shape(shape1, shape2)); + for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); + in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); + in_out_t result = apply_sub<in_out_t>(value1, value2); + tensor_write<in_out_t>(output, shape, index, result); + } } ---- |