diff options
author | Dominic Symes <dominic.symes@arm.com> | 2023-05-09 10:14:49 +0100 |
---|---|---|
committer | Dominic Symes <dominic.symes@arm.com> | 2023-08-16 11:50:24 +0100 |
commit | 830b43b1d1bd82edd57dee1f5cac12e2b5cf0e04 (patch) | |
tree | 971d15dc6ae00118d7be2df9a50dd7445d8428d9 /chapters/ewise_binary.adoc | |
parent | e1f517c541a61d18defc671028b24824c1eadd57 (diff) | |
download | specification-830b43b1d1bd82edd57dee1f5cac12e2b5cf0e04.tar.gz |
Add DIM operator and operations on shape_t values
Shape inference derives the shape of tensors in
the graph from input shapes. Operations such as RESHAPE
may need calculations to derive the new tensor shape.
This patch:
- Adds a DIM operator to get the size of a tensor in
a given axis as a rank 0 tensor of type shape_t
- Allows RESHAPE to take a 1D shape tensor as input for
the new shape
- Allows RESIZE, TILE, PAD to take input sizes based
on shape tensors.
- Allows ADD, SUB, MUL, INTDIV to operate on rank 0
shape_t tensors
- Allows CONCAT to concatenate 0D shape_t tensors to
a 1D shape_t tensor
- Adds CONST support for shape_t tensors
In this version of the specification shape tensors must
be resolvable to constants at backend compile time.
Signed-off-by: Dominic Symes <dominic.symes@arm.com>
Change-Id: I484bd44452453b5e05d0d8a82689564587b224e4
Diffstat (limited to 'chapters/ewise_binary.adoc')
-rw-r--r-- | chapters/ewise_binary.adoc | 123 |
1 files changed, 78 insertions, 45 deletions
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc index 864cf5b..35e454a 100644 --- a/chapters/ewise_binary.adoc +++ b/chapters/ewise_binary.adoc @@ -18,14 +18,22 @@ include::{generated}/operators/ADD.adoc[] [source,c++] ---- -ERROR_IF(shape != broadcast_shape(shape1, shape2)); -for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); - in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_add<in_out_t>(value1, value2); - tensor_write<in_out_t>(output, shape, index, result); +if (in_out_t == shape_t) { + ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); + shape_t value1 = tensor_read<shape_t>(input1, [], []); + shape_t value2 = tensor_read<shape_t>(input2, [], []); + shape_t result = apply_add<shape_t>(value1, value2); + tensor_write<shape_t>(output, [], [], result); +} else { + ERROR_IF(shape != broadcast_shape(shape1, shape2)); + for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); + in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); + in_out_t result = apply_add<in_out_t>(value1, value2); + tensor_write<in_out_t>(output, shape, index, result); + } } ---- @@ -131,18 +139,27 @@ include::{generated}/operators/INTDIV.adoc[] [source,c++] ---- -ERROR_IF(shape != broadcast_shape(shape1, shape2)); -for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); - in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); +if (in_out_t == shape_t) { + ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); + shape_t value1 = tensor_read<shape_t>(input1, [], []); + shape_t value2 = tensor_read<shape_t>(input2, [], []); REQUIRE(value2 != 0); - // This catches the case where we divide minimum<in_out_t> by -1 - // which is not representable in two's complement - REQUIRE((int64_t)value1 / value2 <= maximum<in_out_t>); - in_out_t result = value1 / value2; - tensor_write<in_out_t>(output, shape, index, result); + shape_t result = value1 / value2; + tensor_write<shape_t>(output, [], [], result); +} else { + ERROR_IF(shape != broadcast_shape(shape1, shape2)); + for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); + in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); + REQUIRE(value2 != 0); + // This catches the case where we divide minimum<in_out_t> by -1 + // which is not representable in two's complement + REQUIRE((int64_t)value1 / value2 <= maximum<in_out_t>); + in_out_t result = value1 / value2; + tensor_write<in_out_t>(output, shape, index, result); + } } ---- @@ -297,25 +314,33 @@ include::{generated}/operators/MUL.adoc[] [source,c++] ---- -REQUIRE(0 <= shift && shift <= 63); -REQUIRE(in_t == int32_t || shift == 0); -ERROR_IF(shape != broadcast_shape(shape1, shape2)); -for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read<in_t>(input1, shape1, index1); - in_t value2 = tensor_read<in_t>(input2, shape2, index2); - out_t result; - if (in_t == int32_t && shift > 0) { - int64_t product = (int64_t)value1 * (int64_t)value2; - int64_t round = (int64_t)1 << (shift-1); - product = (product + round) >> shift; - REQUIRE(product >= minimum<int32_t> && product <= maximum<int32_t>) - result = product; - } else { - result = value1 * value2; // low 32-bits of result for int32_t +if (in_out_t == shape_t) { + ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); + shape_t value1 = tensor_read<shape_t>(input1, [], []); + shape_t value2 = tensor_read<shape_t>(input2, [], []); + shape_t result = value1 * value2; + tensor_write<shape_t>(output, [], [], result); +} else { + REQUIRE(0 <= shift && shift <= 63); + REQUIRE(in_t == int32_t || shift == 0); + ERROR_IF(shape != broadcast_shape(shape1, shape2)); + for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_t value1 = tensor_read<in_t>(input1, shape1, index1); + in_t value2 = tensor_read<in_t>(input2, shape2, index2); + out_t result; + if (in_t == int32_t && shift > 0) { + int64_t product = (int64_t)value1 * (int64_t)value2; + int64_t round = (int64_t)1 << (shift-1); + product = (product + round) >> shift; + REQUIRE(product >= minimum<int32_t> && product <= maximum<int32_t>) + result = product; + } else { + result = value1 * value2; // low 32-bits of result for int32_t + } + tensor_write<out_t>(output, shape, index, result); } - tensor_write<out_t>(output, shape, index, result); } ---- @@ -348,14 +373,22 @@ include::{generated}/operators/SUB.adoc[] [source,c++] ---- -ERROR_IF(shape != broadcast_shape(shape1, shape2)); -for_each(index in shape) { - dim_t index1 = apply_broadcast(shape, shape1, index); - dim_t index2 = apply_broadcast(shape, shape2, index); - in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); - in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); - in_out_t result = apply_sub<in_out_t>(value1, value2); - tensor_write<in_out_t>(output, shape, index, result); +if (in_out_t == shape_t) { + ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0); + shape_t value1 = tensor_read<shape_t>(input1, [], []); + shape_t value2 = tensor_read<shape_t>(input2, [], []); + shape_t result = apply_sub<shape_t>(value1, value2); + tensor_write<shape_t>(output, [], [], result); +} else { + ERROR_IF(shape != broadcast_shape(shape1, shape2)); + for_each(index in shape) { + dim_t index1 = apply_broadcast(shape, shape1, index); + dim_t index2 = apply_broadcast(shape, shape2, index); + in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1); + in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2); + in_out_t result = apply_sub<in_out_t>(value1, value2); + tensor_write<in_out_t>(output, shape, index, result); + } } ---- |