diff options
Diffstat (limited to 'chapters/ewise_binary.adoc')
-rw-r--r-- | chapters/ewise_binary.adoc | 151 |
1 files changed, 58 insertions, 93 deletions
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc index 876ab4b..864cf5b 100644 --- a/chapters/ewise_binary.adoc +++ b/chapters/ewise_binary.adoc @@ -18,22 +18,14 @@ include::{generated}/operators/ADD.adoc[] [source,c++] ---- -if (in_out_t == shape_t) { - ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); - shape_t value1 = tensor_read<shape_t>(input1, [], []); - shape_t value2 = tensor_read<shape_t>(input2, [], []); - shape_t result = apply_add_s<shape_t>(value1, value2); - tensor_write<shape_t>(output, [], [], result); -} else { - ERROR_IF(shape != broadcast_shape(shape1, shape2)); - for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); - in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_add_s<in_out_t>(value1, value2); - tensor_write<in_out_t>(output, shape, index, result); - } +ERROR_IF(shape != broadcast_shape(shape1, shape2)); +for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); + in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); + in_out_t result = apply_add<in_out_t>(value1, value2); + tensor_write<in_out_t>(output, shape, index, result); } ---- @@ -54,16 +46,15 @@ for_each(index in shape) { in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); // Ensure that shift amount is appropriate for the data type - REQUIRE((in_out_t == i32_t && 0 <= value2 && value2 <= 31) || - (in_out_t == i16_t && 0 <= value2 && value2 <= 15) || - (in_out_t == i8_t && 0 <= value2 && value2 <= 7)); + REQUIRE((in_out_t == int32_t && 0 <= value2 && value2 <= 31) || + (in_out_t == int16_t && 0 <= value2 && value2 <= 15) || + (in_out_t == int8_t && 0 <= value2 && value2 <= 7)); - in_out_t result = apply_arith_rshift<in_out_t>(value1, value2); - if (round == true && static_cast<int32_t>(value2) > 0 && - (apply_arith_rshift<in_out_t>(value1, apply_sub_s<in_out_t>(value2, 1)) & 1 != 0) { + in_out_t result = value1 >> value2; + if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) { result = result + 1; } - result = apply_clip_s<in_out_t>(result, minimum_s<in_out_t>, maximum_s<in_out_t>); + result = apply_clip<in_out_t>(result, minimum<in_out_t>, maximum<in_out_t>); tensor_write<in_out_t>(output, shape, index, result); } ---- @@ -140,27 +131,18 @@ include::{generated}/operators/INTDIV.adoc[] [source,c++] ---- -if (in_out_t == shape_t) { - ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); - shape_t value1 = tensor_read<shape_t>(input1, [], []); - shape_t value2 = tensor_read<shape_t>(input2, [], []); +ERROR_IF(shape != broadcast_shape(shape1, shape2)); +for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); + in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); REQUIRE(value2 != 0); - shape_t result = value1 / value2; - tensor_write<shape_t>(output, [], [], result); -} else { - ERROR_IF(shape != broadcast_shape(shape1, shape2)); - for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); - in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - REQUIRE(value2 != 0); - // This catches the case where we divide minimum<in_out_t> by -1 - // which is not representable in two's complement - REQUIRE(static_cast<int64_t>(value1) / static_cast<int64_t>(value2) <= maximum_s<in_out_t>); - in_out_t result = apply_intdiv_s<in_out_t>(value1, value2); - tensor_write<in_out_t>(output, shape, index, result); - } + // This catches the case where we divide minimum<in_out_t> by -1 + // which is not representable in two's complement + REQUIRE((int64_t)value1 / value2 <= maximum<in_out_t>); + in_out_t result = value1 / value2; + tensor_write<in_out_t>(output, shape, index, result); } ---- @@ -220,9 +202,8 @@ for_each(index in shape) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - REQUIRE(0 <= static_cast<int32_t>(value2) && static_cast<int32_t>(value2) <= 31); - // Logical shifts happen as unsigned types internally - in_out_t result = apply_logical_rshift<in_out_t>(value1, value2); + REQUIRE(0 <= value2 && value2 <= 31); + in_out_t result = (in_out_t)((unsigned in_out_t)value1 >> value2); tensor_write<in_out_t>(output, shape, index, result); } ---- @@ -282,7 +263,7 @@ for_each(index in shape) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_max_s<in_out_t>(value1, value2); + in_out_t result = apply_max(value1, value2); tensor_write<in_out_t>(output, shape, index, result); } ---- @@ -302,7 +283,7 @@ for_each(index in shape) { dim_t index2 = apply_broadcast(shape, shape2, index); in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_min_s(value1, value2); + in_out_t result = apply_min(value1, value2); tensor_write<in_out_t>(output, shape, index, result); } ---- @@ -316,33 +297,25 @@ include::{generated}/operators/MUL.adoc[] [source,c++] ---- -if (in_out_t == shape_t) { - ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); - shape_t value1 = tensor_read<shape_t>(input1, [], []); - shape_t value2 = tensor_read<shape_t>(input2, [], []); - shape_t result = value1 * value2; - tensor_write<shape_t>(output, [], [], result); -} else { - REQUIRE(0 <= shift && shift <= 63); - REQUIRE(in_t == int32_t || shift == 0); - ERROR_IF(shape != broadcast_shape(shape1, shape2)); - for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read<in_t>(input1, shape1, index1); - in_t value2 = tensor_read<in_t>(input2, shape2, index2); - out_t result; - if (in_t == i32_t && shift > 0) { - int64_t product = sign_extend<int64_t>(value1) * sign_extend<int64_t>(value2); - int64_t round = static_cast<int64_t>(1) << (shift - 1); - product = (product + round) >> shift; - REQUIRE(product >= minimum_s<i32_t> && product <= maximum_s<i32_t>) - result = product; - } else { - result = apply_mul_s(value1, value2); // low 32-bits of result for i32_t - } - tensor_write<out_t>(output, shape, index, result); +REQUIRE(0 <= shift && shift <= 63); +REQUIRE(in_t == int32_t || shift == 0); +ERROR_IF(shape != broadcast_shape(shape1, shape2)); +for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_t value1 = tensor_read<in_t>(input1, shape1, index1); + in_t value2 = tensor_read<in_t>(input2, shape2, index2); + out_t result; + if (in_t == int32_t && shift > 0) { + int64_t product = (int64_t)value1 * (int64_t)value2; + int64_t round = (int64_t)1 << (shift-1); + product = (product + round) >> shift; + REQUIRE(product >= minimum<int32_t> && product <= maximum<int32_t>) + result = product; + } else { + result = value1 * value2; // low 32-bits of result for int32_t } + tensor_write<out_t>(output, shape, index, result); } ---- @@ -375,22 +348,14 @@ include::{generated}/operators/SUB.adoc[] [source,c++] ---- -if (in_out_t == shape_t) { - ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); - shape_t value1 = tensor_read<shape_t>(input1, [], []); - shape_t value2 = tensor_read<shape_t>(input2, [], []); - shape_t result = apply_sub<shape_t>(value1, value2); - tensor_write<shape_t>(output, [], [], result); -} else { - ERROR_IF(shape != broadcast_shape(shape1, shape2)); - for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); - in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_sub_s<in_out_t>(value1, value2); - tensor_write<in_out_t>(output, shape, index, result); - } +ERROR_IF(shape != broadcast_shape(shape1, shape2)); +for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); + in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); + in_out_t result = apply_sub<in_out_t>(value1, value2); + tensor_write<in_out_t>(output, shape, index, result); } ---- @@ -418,11 +383,11 @@ REQUIRE(length(table) == TABLE_SIZE); for_each(index in shape) { in_t value = tensor_read<in_t>(input, shape, index); out_t result; - if (in_t == i8_t) { + if (in_t == int8_t) { // value is a signed int, convert to a 0 based index - result = table[static_cast<int16_t>(value) + 128]; + result = table[value + 128]; } else { - result = apply_lookup_s(static_cast<int16_t>(table), static_cast<int16_t>(value)); + result = apply_lookup(table, value); } tensor_write<out_t>(output, shape, index, result); } |