aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Kunze <eric.kunze@arm.com>2021-11-12 16:15:47 -0800
committerEric Kunze <eric.kunze@arm.com>2021-11-12 16:16:57 -0800
commita0e9a523fcee7f25d4a81289cf10e9f9082ee878 (patch)
treee48d90ea86c18bd1e36dccbedb6eb8d77263d5cc
parentb79c7d19af28899abb6b821640ea365fd27516a1 (diff)
downloadspecification-a0e9a523fcee7f25d4a81289cf10e9f9082ee878.tar.gz
Use in_out_t when a type is used for input and output
Also change acc_t to out_t when the value is being used as an output. This should make the argument tables easier to follow. Change-Id: I2a57f3c4eaf937f29da785ff5c11576663a39494
-rw-r--r--chapters/activation_funcs.adoc28
-rw-r--r--chapters/data_layout.adoc68
-rw-r--r--chapters/data_nodes.adoc6
-rw-r--r--chapters/ewise_binary.adoc252
-rw-r--r--chapters/ewise_ternary.adoc16
-rw-r--r--chapters/ewise_unary.adoc140
-rw-r--r--chapters/reduction.adoc90
-rw-r--r--chapters/tensor_ops.adoc140
8 files changed, 370 insertions, 370 deletions
diff --git a/chapters/activation_funcs.adoc b/chapters/activation_funcs.adoc
index a58a1fc..84a1039 100644
--- a/chapters/activation_funcs.adoc
+++ b/chapters/activation_funcs.adoc
@@ -20,10 +20,10 @@ No zero point subtraction is done to the values, thus to clamp to the zero point
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|Input|shape|Input tensor
-|Attribute|in_t|min_val|-|minimum clip value
-|Attribute|in_t|max_val|-|maximum clip value
-|Output|in_t*|Output|shape|Output tensor of same type and shape as input
+|Input|in_out_t*|Input|shape|Input tensor
+|Attribute|in_out_t|min_val|-|minimum clip value
+|Attribute|in_out_t|max_val|-|maximum clip value
+|Output|in_out_t*|Output|shape|Output tensor of same type and shape as input
|===
*Operation Function:*
@@ -31,16 +31,16 @@ No zero point subtraction is done to the values, thus to clamp to the zero point
----
ERROR_IF(max_val < min_val);
for_each(index in shape) {
- in_t value = tensor_read<in_t>(input, shape, index);
- value = apply_clip<in_t>(value, min_val, max_val);
- tensor_write<in_t>(output, shape, index, value);
+ in_out_t value = tensor_read<in_out_t>(input, shape, index);
+ value = apply_clip<in_out_t>(value, min_val, max_val);
+ tensor_write<in_out_t>(output, shape, index, value);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
@@ -72,14 +72,14 @@ generate_lookup_table(&sigmoid_table, &sigmoid_reference);
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|Input|shape|Input tensor
-|Output|in_t*|Output|shape|Output tensor of same type and shape as input
+|Input|in_out_t*|Input|shape|Input tensor
+|Output|in_out_t*|Output|shape|Output tensor of same type and shape as input
|===
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|MI, MT|floating-point|float_t
|===
@@ -110,14 +110,14 @@ generate_lookup_table(&tanh_table, &tanh_reference);
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|Input|shape|Input tensor
-|Output|in_t*|Output|shape|Output tensor of same type and shape as input
+|Input|in_out_t*|Input|shape|Input tensor
+|Output|in_out_t*|Output|shape|Output tensor of same type and shape as input
|===
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|MI, MT|floating-point|float_t
|===
diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc
index 54221f6..e50a14e 100644
--- a/chapters/data_layout.adoc
+++ b/chapters/data_layout.adoc
@@ -18,9 +18,9 @@ No data conversion happens during a concat operation.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shapes1[]|List of input tensors. All inputs must have the same rank and data type
+|Input|in_out_t*|input1|shapes1[]|List of input tensors. All inputs must have the same rank and data type
|Attribute|int|axis|-|Axis along which concatenation is to occur, in range from 0 to rank(shape)-1
-|Output|in_t*|output|shape|Output tensor
+|Output|in_out_t*|output|shape|Output tensor
|===
*Operation Function:*
@@ -43,8 +43,8 @@ for_each(index1 in shape) {
// For each output location, we are looking for the
// appropriate input tensor
if (index2[axis] >= 0 && index2[axis] < shapes1[t][axis]) {
- in_t value = tensor_read<in_t>(input1[t], shapes1[t], index2);
- tensor_write<in_t>(output, shape, index1, value);
+ in_out_t value = tensor_read<in_out_t>(input1[t], shapes1[t], index2);
+ tensor_write<in_out_t>(output, shape, index1, value);
}
index2[axis] = index2[axis] - shapes1[t][axis];
}
@@ -55,7 +55,7 @@ for_each(index1 in shape) {
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Boolean|bool_t
|Any|signed 8|int8_t
@@ -75,10 +75,10 @@ The pad_const value includes the zero point if the tensor uses a zero point.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input1|shape1|Input tensor
|Attribute|int|padding|[rank(input1),2]|Amount of padding to be done
-|Attribute|in_t|pad_const|-|Constant value to be used as padding
-|Output|in_t*|output|shape|Output tensor of same type as the input tensor
+|Attribute|in_out_t|pad_const|-|Constant value to be used as padding
+|Output|in_out_t*|output|shape|Output tensor of same type as the input tensor
|===
*Operation Function:*
@@ -98,15 +98,15 @@ for_each(index in shape) {
is_pad = true;
}
}
- in_t value = is_pad ? pad_const : tensor_read<in_t>(input1, shape1, index1);
- tensor_write<in_t>(output, shape, index, value);
+ in_out_t value = is_pad ? pad_const : tensor_read<in_out_t>(input1, shape1, index1);
+ tensor_write<in_out_t>(output, shape, index, value);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Boolean|bool_t
|Any|signed 8|int8_t
@@ -124,9 +124,9 @@ Returns a tensor with the same type/values as the input, with a new shape specif
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input1|shape1|Input tensor
|Attribute|int|new_shape|[rank(output)]|List of values, with each element giving the size of the result tensor for the given dimension. At most one dimension may be given as -1 to automatically calculate the dimension size.
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
@@ -142,7 +142,7 @@ for(i = 0; i < tensor_size(shape); i++) {
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Boolean|bool_t
|Any|signed 8|int8_t
@@ -160,9 +160,9 @@ Returns a tensor with the same type/values as the input, with the data reversed
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input|shape|Input tensor from 1 to 4 dims
+|Input|in_out_t*|input|shape|Input tensor from 1 to 4 dims
|Attribute|int|axis|-|Axis to reverse, in range from 0 to rank(shape)-1
-|Output|in_t*|output|shape|Output tensor. Same shape as input tensor.
+|Output|in_out_t*|output|shape|Output tensor. Same shape as input tensor.
|===
*Operation Function:*
@@ -173,15 +173,15 @@ ERROR_IF(axis < 0 || axis >= rank(shape));
for_each(index in shape) {
tmp_index = index;
tmp_index[axis] = shape[axis] - 1 - index[axis];
- in_t value = tensor_read<in_t>(input, shape, tmp_index);
- tensor_write<in_t>(output, shape, index, value);
+ in_out_t value = tensor_read<in_out_t>(input, shape, tmp_index);
+ tensor_write<in_out_t>(output, shape, index, value);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Boolean|bool_t
|Any|signed 8|int8_t
@@ -199,10 +199,10 @@ No data conversion happens during a slice operation.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor with rank from 1 to 4
+|Input|in_out_t*|input1|shape1|Input tensor with rank from 1 to 4
|Attribute|int|start|[rank(input1)]|List of integer coordinates, of length equal to the rank of input1. Start coordinate for slicing.
|Attribute|int|size|[rank(input1)]|List of integer size values, of length equal to the rank of input1. Size of the input to be used.
-|Output|in_t*|output|shape|Output tensor of same type as the input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type as the input tensor
|===
*Operation Function:*
@@ -225,15 +225,15 @@ for_each(index in shape) {
for(i = 0; i < rank(shape); i++) {
tmp_index[i] = index[i] + start[i];
}
- in_t value = tensor_read<in_t>(input, shape1, tmp_index);
- tensor_write<in_t>(output, shape, index, value);
+ in_out_t value = tensor_read<in_out_t>(input, shape1, tmp_index);
+ tensor_write<in_out_t>(output, shape, index, value);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Boolean|bool_t
|Any|signed 8|int8_t
@@ -251,9 +251,9 @@ Replicates input1 multiplies times along each dimension.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor with rank from 1 to 4
+|Input|in_out_t*|input1|shape1|Input tensor with rank from 1 to 4
|Attribute|int32_t|multiplies|[rank(shape1)]|Number of times to replicate input1 in each dimension
-|Output|in_t*|output|shape|Output tensor of same type, rank as the input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, rank as the input tensor
|===
*Operation Function:*
@@ -266,15 +266,15 @@ for_each(index in shape) {
REQUIRE(shape1[i] * multiplies[i] == shape[i]);
tmp_index[i] = index[i] % shape1[i];
}
- in_t value = tensor_read<in_t>(input, shape1, tmp_index);
- tensor_write<in_t>(output, shape, index, value);
+ in_out_t value = tensor_read<in_out_t>(input, shape1, tmp_index);
+ tensor_write<in_out_t>(output, shape, index, value);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Boolean|bool_t
|Any|signed 8|int8_t
@@ -292,9 +292,9 @@ Permutes the dimensions based on perm.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor with minimum rank of one.
+|Input|in_out_t*|input1|shape1|Input tensor with minimum rank of one.
|Attribute|int32_t|perms|[rank(input1)]|List of integers of length equal to the rank of input1. Values must be valid dimensions within shape1, and may not be repeated.
-|Output|in_t*|output|shape|Output tensor of same type, rank as the input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, rank as the input tensor
|===
*Operation Function:*
@@ -315,15 +315,15 @@ for_each(index in shape) {
REQUIRE(shape1[perm[i]] == shape[i])
tmp_index[perm[i]] = index[i]
}
- in_t value = tensor_read<in_t>(input, shape1, tmp_index);
- tensor_write<in_t>(output, shape, index, value);
+ in_out_t value = tensor_read<in_out_t>(input, shape1, tmp_index);
+ tensor_write<in_out_t>(output, shape, index, value);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Boolean|bool_t
|Any|signed 8|int8_t
diff --git a/chapters/data_nodes.adoc b/chapters/data_nodes.adoc
index e4a3e88..9d32a62 100644
--- a/chapters/data_nodes.adoc
+++ b/chapters/data_nodes.adoc
@@ -43,14 +43,14 @@ Returns a tensor with the same shape, type, and contents as the input.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Boolean|bool_t
|Any|signed 8|int8_t
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index c624437..aa1c86c 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -19,9 +19,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -31,17 +31,17 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t result = apply_add<in_t>(value1, value2);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = apply_add<in_out_t>(value1, value2);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 32|int32_t
|MI, MT|floating-point|float_t
@@ -57,10 +57,10 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
|Input|bool_t|round|-|If true then the shift is rounded
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -70,27 +70,27 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
// Ensure that shift amount is appropriate for the data type
- REQUIRE((in_t == int32_t && 0 <= value2 && value2 <= 31) ||
- (in_t == int16_t && 0 <= value2 && value2 <= 15) ||
- (in_t == int8_t && 0 <= value2 && value2 <= 7));
+ REQUIRE((in_out_t == int32_t && 0 <= value2 && value2 <= 31) ||
+ (in_out_t == int16_t && 0 <= value2 && value2 <= 15) ||
+ (in_out_t == int8_t && 0 <= value2 && value2 <= 7));
- in_t result = value1 >> value2;
+ in_out_t result = value1 >> value2;
if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) {
result = result + 1;
}
- result = apply_clip<in_t>(result, minimum<in_t>, maximum<in_t>);
- tensor_write<in_t>(output, shape, index, result);
+ result = apply_clip<in_out_t>(result, minimum<in_out_t>, maximum<in_out_t>);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
@@ -107,9 +107,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
|===
*Operation Function:*
@@ -119,17 +119,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t result = value1 & value2;
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = value1 & value2;
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
@@ -146,9 +146,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -158,17 +158,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t result = value1 | value2;
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = value1 | value2;
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
@@ -185,9 +185,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -197,17 +197,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t result = value1 ^ value2;
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = value1 ^ value2;
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
@@ -227,9 +227,9 @@ Quantized integer divide should use TABLE (for 1/x) and MUL.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -239,20 +239,20 @@ Quantized integer divide should use TABLE (for 1/x) and MUL.
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
REQUIRE(value2 != 0);
- // This catches the case where we divide minimum<in_t> by -1
+ // This catches the case where we divide minimum<in_out_t> by -1
// which is not representable in two's complement
- REQUIRE((int64_t)value1 / value2 <= maximum<in_t>);
- in_t result = value1 / value2;
- tensor_write<in_t>(output, shape, index, result);
+ REQUIRE((int64_t)value1 / value2 <= maximum<in_out_t>);
+ in_out_t result = value1 / value2;
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 32|int32_t
|===
@@ -267,9 +267,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -279,17 +279,17 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t result = value1 && value2;
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = value1 && value2;
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Bool|bool_t
|===
@@ -304,9 +304,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -316,18 +316,18 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
REQUIRE(0 <= value2 && value2 <= 31);
- in_t result = value1 << value2;
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t result = value1 << value2;
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
@@ -344,9 +344,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -356,18 +356,18 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
REQUIRE(0 <= value2 && value2 <= 31);
- in_t result = (in_t)((unsigned in_t)value1 >> value2);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t result = (in_out_t)((unsigned in_out_t)value1 >> value2);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
@@ -384,9 +384,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -396,17 +396,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t result = value1 || value2;
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = value1 || value2;
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Bool|bool_t
|===
@@ -421,9 +421,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
|===
*Operation Function:*
@@ -433,17 +433,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t result = value1 != value2;
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = value1 != value2;
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Bool|bool_t
|===
@@ -458,9 +458,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -470,17 +470,17 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t result = apply_max(value1, value2);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = apply_max(value1, value2);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 32|int32_t
|MI, MT|floating-point|float_t
@@ -496,9 +496,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -508,17 +508,17 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t result = apply_min(value1, value2);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = apply_min(value1, value2);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 32|int32_t
|MI, MT|floating-point|float_t
@@ -579,9 +579,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor from 1 to 4 dims
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
|===
*Operation Function:*
@@ -591,17 +591,17 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t result = apply_pow<in_t>(value1, value2);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = apply_pow<in_out_t>(value1, value2);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|MI, MT|floating-point|float_t
|===
@@ -616,9 +616,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape1|Input tensor
-|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
-|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
+|Input|in_out_t*|input1|shape1|Input tensor
+|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1
+|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
@@ -628,17 +628,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
- in_t value1 = tensor_read<in_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t result = apply_sub<in_t>(value1, value2);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t result = apply_sub<in_out_t>(value1, value2);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 32|int32_t
|MI, MT|floating-point|float_t
diff --git a/chapters/ewise_ternary.adoc b/chapters/ewise_ternary.adoc
index 751c093..e61e1c2 100644
--- a/chapters/ewise_ternary.adoc
+++ b/chapters/ewise_ternary.adoc
@@ -19,9 +19,9 @@ Elementwise select of the output based on a condition.
|Argument|Type|Name|Shape|Description
|Input|cmp_t|input1|shape1|Input selector tensor
-|Input|in_t*|input2|shape2|Input value tensor if input1 is True
-|Input|in_t*|input3|shape3|Input value tensor if input1 is False
-|Output|in_t*|output|shape|Output tensor of same type as input2 and input3, with broadcast shape if necessary
+|Input|in_out_t*|input2|shape2|Input value tensor if input1 is True
+|Input|in_out_t*|input3|shape3|Input value tensor if input1 is False
+|Output|in_out_t*|output|shape|Output tensor of same type as input2 and input3, with broadcast shape if necessary
|===
*Operation Function:*
@@ -33,21 +33,21 @@ for_each(index in shape) {
index2 = apply_broadcast(shape, shape2, index);
index3 = apply_broadcast(shape, shape3, index);
cmp_t value1 = tensor_read<cmp_t>(input1, shape1, index1);
- in_t value2 = tensor_read<in_t>(input2, shape2, index2);
- in_t value3 = tensor_read<in_t>(input3, shape3, index3);
- in_t result;
+ in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+ in_out_t value3 = tensor_read<in_out_t>(input3, shape3, index3);
+ in_out_t result;
if (value1) {
result = value2;
} else {
result = value3;
}
- tensor_write<in_t>(output, shape, index, result);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|cmp_t|in_t
+|Profile|Mode|cmp_t|in_out_t
|Any|Boolean|bool_t|bool_t
|Any|signed 8|bool_t|int8_t
diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc
index 633b8ac..326cc3c 100644
--- a/chapters/ewise_unary.adoc
+++ b/chapters/ewise_unary.adoc
@@ -18,8 +18,8 @@ Elementwise absolute value operation
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
@@ -27,17 +27,17 @@ Elementwise absolute value operation
[source,c++]
----
for_each(index in shape) {
- in_t value1 = tensor_read<in_t>(input1, shape, index);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape, index);
if (value1 < 0)
- value1 = apply_sub<in_t>(0, value1);
- tensor_write<in_t>(output, shape, index, value1);
+ value1 = apply_sub<in_out_t>(0, value1);
+ tensor_write<in_out_t>(output, shape, index, value1);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 32|int32_t
|MI, MT|floating-point|float_t
@@ -52,8 +52,8 @@ Elementwise bitwise NOT of input tensor.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
@@ -61,16 +61,16 @@ Elementwise bitwise NOT of input tensor.
[source,c++]
----
for_each(index in shape) {
- in_t value1 = tensor_read<in_t>(input1, shape, index);
- in_t result = ~value1;
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape, index);
+ in_out_t result = ~value1;
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
@@ -86,8 +86,8 @@ Elementwise ceiling operation
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
@@ -95,16 +95,16 @@ Elementwise ceiling operation
[source,c++]
----
for_each(index in shape) {
- in_t value1 = tensor_read<in_t>(input1, shape, index);
- in_t result = apply_ceil<in_t>(value1);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape, index);
+ in_out_t result = apply_ceil<in_out_t>(value1);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|MI, MT|floating-point|float_t
|===
@@ -118,8 +118,8 @@ Elementwise count leading zeros operation
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
@@ -127,15 +127,15 @@ Elementwise count leading zeros operation
[source,c++]
----
for_each(index in shape) {
- in_t value1 = tensor_read<in_t>(input1, shape, index);
- in_t result = count_leading_zeros(value1);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape, index);
+ in_out_t result = count_leading_zeros(value1);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 32|int32_t
|===
@@ -149,8 +149,8 @@ Elementwise e to the x operation
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
@@ -158,16 +158,16 @@ Elementwise e to the x operation
[source,c++]
----
for_each(index in shape) {
- in_t value1 = tensor_read<in_t>(input1, shape, index);
- in_t result = apply_exp<in_t>(value1);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape, index);
+ in_out_t result = apply_exp<in_out_t>(value1);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|MI, MT|floating-point|float_t
|===
@@ -181,8 +181,8 @@ Elementwise floor operation
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
@@ -190,16 +190,16 @@ Elementwise floor operation
[source,c++]
----
for_each(index in shape) {
- in_t value1 = tensor_read<in_t>(input1, shape, index);
- in_t result = apply_floor<in_t>(value1);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape, index);
+ in_out_t result = apply_floor<in_out_t>(value1);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|MI, MT|floating-point|float_t
|===
@@ -213,8 +213,8 @@ Elementwise natural logarithm operation
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
@@ -222,16 +222,16 @@ Elementwise natural logarithm operation
[source,c++]
----
for_each(index in shape) {
- in_t value1 = tensor_read<in_t>(input1, shape, index);
- in_t result = apply_log<in_t>(value1);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape, index);
+ in_out_t result = apply_log<in_out_t>(value1);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|MI, MT|floating-point|float_t
|===
@@ -245,8 +245,8 @@ Elementwise logical NOT of input.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
@@ -254,16 +254,16 @@ Elementwise logical NOT of input.
[source,c++]
----
for_each(index in shape) {
- in_t value1 = tensor_read<in_t>(input1, shape1, index);
- in_t result = !value1;
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index);
+ in_out_t result = !value1;
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|bool|bool_t
|===
@@ -277,31 +277,31 @@ Elementwise negation operation
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Attribute|in_t|input1_zp|-|Input 1 zero point. Must be zero for non-int8 types.
-|Attribute|in_t|output_zp|-|Output zero point. Must be zero for non-int8 types.
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Attribute|in_out_t|input1_zp|-|Input 1 zero point. Must be zero for non-int8 types.
+|Attribute|in_out_t|output_zp|-|Output zero point. Must be zero for non-int8 types.
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
[source,c++]
----
-ERROR_IF(in_t != int8_t && input1_zp != 0) // Zero point only for int8_t
-ERROR_IF(in_t != int8_t && output_zp != 0) // Zero point only for int8_t
+ERROR_IF(in_out_t != int8_t && input1_zp != 0) // Zero point only for int8_t
+ERROR_IF(in_out_t != int8_t && output_zp != 0) // Zero point only for int8_t
for_each(index in shape) {
- in_t value1 = tensor_read<in_t>(input1, shape, index);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape, index);
acc_t value = (acc_t)value1 - input1_zp;
value = apply_sub<acc_t>(0, value);
- in_t result = (in_t)apply_clip<acc_t>(value + output_zp, minimum<in_t>, maximum<in_t>);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t result = (in_out_t)apply_clip<acc_t>(value + output_zp, minimum<in_out_t>, maximum<in_out_t>);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t|acc_t
+|Profile|Mode|in_out_t|acc_t
|Any|signed 8|int8_t|int32_t
|Any|signed 16|int16_t|int32_t
@@ -318,8 +318,8 @@ Elementwise reciprocal operation. For integer operation, a TABLE should be used
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
@@ -327,16 +327,16 @@ Elementwise reciprocal operation. For integer operation, a TABLE should be used
[source,c++]
----
for_each(index in shape) {
- in_t value1 = tensor_read<in_t>(input1, shape1, index);
- in_t result = 1.0 / value1;
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index);
+ in_out_t result = 1.0 / value1;
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|MI, MT|floating-point|float_t
|===
@@ -350,8 +350,8 @@ Elementwise reciprocal square root operation. For integer operation, a TABLE sho
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input1|shape|Input tensor
-|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor
+|Input|in_out_t*|input1|shape|Input tensor
+|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
|===
*Operation Function:*
@@ -359,16 +359,16 @@ Elementwise reciprocal square root operation. For integer operation, a TABLE sho
[source,c++]
----
for_each(index in shape) {
- in_t value1 = tensor_read<in_t>(input1, shape1, index);
- in_t result = 1.0 / apply_sqrt<in_t>(value1);
- tensor_write<in_t>(output, shape, index, result);
+ in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index);
+ in_out_t result = 1.0 / apply_sqrt<in_out_t>(value1);
+ tensor_write<in_out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|MI, MT|floating-point|float_t
|===
diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc
index 11db960..fdf30df 100644
--- a/chapters/reduction.adoc
+++ b/chapters/reduction.adoc
@@ -18,9 +18,9 @@ Reduce a tensor along the given axis with a logical AND operation
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4
+|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4
|Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1
-|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor.
+|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor.
|===
*Operation Function:*
@@ -32,22 +32,22 @@ ERROR_IF(shape[axis] != 1);
// Initialize output state to true
for_each(index in shape) {
- tensor_write<in_t>(output, shape, index, true);
+ tensor_write<in_out_t>(output, shape, index, true);
}
for_each(index in shape1) {
out_index = index;
out_index[axis] = 0;
- in_t value = tensor_read<in_t>(input, shape1, index);
- in_t state = tensor_read<in_t>(output, shape, out_index);
+ in_out_t value = tensor_read<in_out_t>(input, shape1, index);
+ in_out_t state = tensor_read<in_out_t>(output, shape, out_index);
state = state && value;
- tensor_write<in_t>(output, shape, out_index, state);
+ tensor_write<in_out_t>(output, shape, out_index, state);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Boolean|bool_t
|===
@@ -61,9 +61,9 @@ Reduce a tensor along the given axis with a logical OR operation
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4
+|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4
|Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1
-|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor.
+|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor.
|===
*Operation Function:*
@@ -75,22 +75,22 @@ ERROR_IF(shape[axis] != 1);
// Initialize output state to false
for_each(index in shape) {
- tensor_write<in_t>(output, shape, index, false);
+ tensor_write<in_out_t>(output, shape, index, false);
}
for_each(index in shape1) {
out_index = index;
out_index[axis] = 0;
- in_t value = tensor_read<in_t>(input, shape1, index);
- in_t state = tensor_read<in_t>(output, shape, out_index);
+ in_out_t value = tensor_read<in_out_t>(input, shape1, index);
+ in_out_t state = tensor_read<in_out_t>(output, shape, out_index);
state = state || value;
- tensor_write<in_t>(output, shape, out_index, state);
+ tensor_write<in_out_t>(output, shape, out_index, state);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|Boolean|bool_t
|===
@@ -104,9 +104,9 @@ Reduce a tensor along the given axis with a maximum operation
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4
+|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4
|Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1
-|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor.
+|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor.
|===
*Operation Function:*
@@ -116,22 +116,22 @@ Reduce a tensor along the given axis with a maximum operation
ERROR_IF(axis < 0 || axis >= rank(shape1));
ERROR_IF(shape[axis] != 1);
for_each(index in shape) {
- tensor_write<in_t>(output, shape, index, minimum<in_t>);
+ tensor_write<in_out_t>(output, shape, index, minimum<in_out_t>);
}
for_each(index in shape1) {
out_index = index;
out_index[axis] = 0;
- in_t value = tensor_read<in_t>(input, shape1, index);
- in_t state = tensor_read<in_t>(output, shape, out_index);
- state = apply_max<in_t>(state, value);
- tensor_write<in_t>(output, shape, out_index, state);
+ in_out_t value = tensor_read<in_out_t>(input, shape1, index);
+ in_out_t state = tensor_read<in_out_t>(output, shape, out_index);
+ state = apply_max<in_out_t>(state, value);
+ tensor_write<in_out_t>(output, shape, out_index, state);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
@@ -147,9 +147,9 @@ Reduce a tensor along the given axis with a minimum operation
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4
+|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4
|Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1
-|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor.
+|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor.
|===
*Operation Function:*
@@ -159,22 +159,22 @@ Reduce a tensor along the given axis with a minimum operation
ERROR_IF(axis < 0 || axis >= rank(shape1));
ERROR_IF(shape[axis] != 1);
for_each(index in shape) {
- tensor_write<in_t>(output, shape, index, maximum<in_t>);
+ tensor_write<in_out_t>(output, shape, index, maximum<in_out_t>);
}
for_each(index in shape1) {
out_index = index;
out_index[axis] = 0;
- in_t value = tensor_read<in_t>(input, shape1, index);
- in_t state = tensor_read<in_t>(output, shape, out_index);
- state = apply_min<in_t>(state, value);
- tensor_write<in_t>(output, shape, out_index, state);
+ in_out_t value = tensor_read<in_out_t>(input, shape1, index);
+ in_out_t state = tensor_read<in_out_t>(output, shape, out_index);
+ state = apply_min<in_out_t>(state, value);
+ tensor_write<in_out_t>(output, shape, out_index, state);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
@@ -191,9 +191,9 @@ Reduce a tensor along the given axis by computing the product of the axis.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4
+|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4
|Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1
-|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor.
+|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor.
|===
*Operation Function:*
@@ -203,22 +203,22 @@ Reduce a tensor along the given axis by computing the product of the axis.
ERROR_IF(axis < 0 || axis >= rank(shape1));
ERROR_IF(shape[axis] != 1);
for_each(index in shape) {
- tensor_write<in_t>(output, shape, index, 1.0);
+ tensor_write<in_out_t>(output, shape, index, 1.0);
}
for_each(index in shape1) {
out_index = index;
out_index[axis] = 0;
- in_t value = tensor_read<in_t>(input, shape1, index);
- in_t state = tensor_read<in_t>(output, shape, out_index);
+ in_out_t value = tensor_read<in_out_t>(input, shape1, index);
+ in_out_t state = tensor_read<in_out_t>(output, shape, out_index);
state = state * value;
- tensor_write<in_t>(output, shape, out_index, state);
+ tensor_write<in_out_t>(output, shape, out_index, state);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|MI, MT|floating-point|float_t
|===
@@ -232,9 +232,9 @@ Reduce a tensor along the given axis by computing the sum of the axis.
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4
+|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4
|Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1
-|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor.
+|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor.
|===
*Operation Function:*
@@ -244,22 +244,22 @@ Reduce a tensor along the given axis by computing the sum of the axis.
ERROR_IF(axis < 0 || axis >= rank(shape1));
ERROR_IF(shape[axis] != 1);
for_each(index in shape) {
- tensor_write<in_t>(output, shape, index, 0);
+ tensor_write<in_out_t>(output, shape, index, 0);
}
for_each(index in shape1) {
out_index = index;
out_index[axis] = 0;
- in_t value = tensor_read<in_t>(input, shape1, index);
- in_t state = tensor_read<in_t>(output, shape, out_index);
- state = apply_add<in_t>(state, value);
- tensor_write<in_t>(output, shape, out_index, state);
+ in_out_t value = tensor_read<in_out_t>(input, shape1, index);
+ in_out_t state = tensor_read<in_out_t>(output, shape, out_index);
+ state = apply_add<in_out_t>(state, value);
+ tensor_write<in_out_t>(output, shape, out_index, state);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 32|int32_t
|MI, MT|floating-point|float_t
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index cfab5ba..b2f0754 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -74,21 +74,21 @@ When calculating the average, only the number of valid input tensor values, but
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input|[N,IH,IW,C]|Input tensor 4D
+|Input|in_out_t*|input|[N,IH,IW,C]|Input tensor 4D
|Attribute|int*|kernel|[2]|[kernel_y, kernel_x]
|Attribute|int*|stride|[2]|[stride_y, stride_x]
|Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
-|Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types.
-|Attribute|in_t|output_zp|-|Output tensor zero point. Must be zero for non-int8 types.
-|Output|in_t*|output|[N,H,W,C]|Output tensor 4D
+|Attribute|in_out_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types.
+|Attribute|in_out_t|output_zp|-|Output tensor zero point. Must be zero for non-int8 types.
+|Output|in_out_t*|output|[N,H,W,C]|Output tensor 4D
|===
*Operation Function:*
[source,c++]
----
-ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
-ERROR_IF(in_t != int8_t && output_zp != 0); // Zero point only for int8_t
+ERROR_IF(in_out_t != int8_t && input_zp != 0); // Zero point only for int8_t
+ERROR_IF(in_out_t != int8_t && output_zp != 0); // Zero point only for int8_t
ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1
ERROR_IF(stride_y < 1 || stride_x < 1);
ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0);
@@ -102,7 +102,7 @@ ERROR_IF(H != idiv((IH + pad_top + pad_bottom + stride_y - kernel_y), stride_y))
ERROR_IF(W != idiv((IW + pad_left + pad_right + stride_x - kernel_x), stride_x));
for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
- in_t output_val;
+ in_out_t output_val;
acc_t acc = 0;
int count = 0;
iy = oy * stride_y - pad_top;
@@ -114,25 +114,25 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
// average, padding does not count
if (0 <= y < IH and 0 <= x < IW) {
count++;
- acc_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c]);
+ acc_t value = tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]);
value = value - input_zp;
acc = apply_add<acc_t>(acc, value);
}
}
- if (is_float(in_t)) {
+ if (is_float(in_out_t)) {
output_val = acc / (float)count;
} else {
scale_t scale = reciprocal_scale(count);
acc = apply_scale_32(acc, scale.multiplier, scale.shift, false);
- output_val = (in_t)apply_clip<acc_t>(acc + output_zp, minimum<in_t>, maximum<in_t>)
+ output_val = (in_out_t)apply_clip<acc_t>(acc + output_zp, minimum<in_out_t>, maximum<in_out_t>)
}
- tensor_write<in_t>(output, [N,H,W,C], [n,oy,ox,c], output_val);
+ tensor_write<in_out_t>(output, [N,H,W,C], [n,oy,ox,c], output_val);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t|acc_t
+|Profile|Mode|in_out_t|acc_t
|Any|signed 8|int8_t|int32_t
|Any|signed 16|int16_t|int32_t
@@ -150,13 +150,13 @@ Performs a 2D convolution over the given tensor input, using the weight tensor.
|Input|in_t*|input|[N,IH,IW,IC]|Input tensor
|Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW
-|Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[OC]|Per output channel bias data.
+|Input (MT profile) Attribute (BI/MI profiles)|out_t*|bias|[OC]|Per output channel bias data.
|Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
|Attribute|int*|stride|[2]|[stride_y, stride_x]
|Attribute|int*|dilation|[2]|[dilation_y, dilation_x]
|Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types.
|Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types.
-|Output|acc_t*|output|[N,H,W,OC]|Output tensor
+|Output|out_t*|output|[N,H,W,OC]|Output tensor
|===
*Operation Function*
@@ -170,29 +170,29 @@ ERROR_IF(stride_y < 1 || stride_x < 1);
ERROR_IF(dilation_y < 1 || dilation_x < 1);
pad = flatten([0,0], pad, [0,0]);
for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
- acc_t acc = 0;
+ out_t acc = 0;
iy = oy * stride_y - pad_top;
ix = ox * stride_x - pad_left;
for_each(0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) {
y = iy + ky * dilation_y;
x = ix + kx * dilation_x;
if (0 <= y < IH && 0 <= x < IW) {
- acc_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic]);
- acc_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]);
+ out_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic]);
+ out_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]);
value = value - input_zp;
weight = weight - weight_zp;
- acc = apply_add<acc_t>(acc, value * weight);
+ acc = apply_add<out_t>(acc, value * weight);
}
}
- acc = apply_add<acc_t>(acc, bias[oc]);
- tensor_write<acc_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc);
+ acc = apply_add<out_t>(acc, bias[oc]);
+ tensor_write<out_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t|weight_t|acc_t
+|Profile|Mode|in_t|weight_t|out_t
|Any|signed 8x8|int8_t|int8_t|int32_t
|Any|signed 8x4|int8_t|int4_t|int32_t
@@ -211,13 +211,13 @@ Performs a 3D convolution over the given input tensor.
|Input|in_t*|input|[N,ID,IH,IW,IC]|Input tensor
|Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KD,KH,KW,IC]|Weight kernel size KDxKHxKW
-|Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[OC]|Per output channel bias data.
+|Input (MT profile) Attribute (BI/MI profiles)|out_t*|bias|[OC]|Per output channel bias data.
|Attribute|int*|pad|[6]|[pad_d0, pad_d1, pad_top, pad_bottom, pad_left, pad_right]
|Attribute|int*|stride|[3]|[stride_d, stride_y, stride_x]
|Attribute|int*|dilation|[3]|[dilation_d, dilation_y, dilation_x]
|Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types.
|Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types.
-|Output|acc_t*|output|[N,D,H,W,OC]|Output tensor
+|Output|out_t*|output|[N,D,H,W,OC]|Output tensor
|===
*Operation Function*
@@ -231,7 +231,7 @@ ERROR_IF(stride_d < 1 || stride_y < 1 || stride_x < 1);
ERROR_IF(dilation_d < 1 || dilation_y < 1 || dilation_x < 1);
pad = flatten([0,0], pad, [0,0]);
for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
- acc_t acc = 0;
+ out_t acc = 0;
id = od * stride_d - pad_d0;
iy = oy * stride_y - pad_top;
ix = ox * stride_x - pad_left;
@@ -240,22 +240,22 @@ for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) {
y = iy + ky * dilation_y;
x = ix + kx * dilation_x;
if (0 <= x < IW && 0 <= y < IH && 0 <= d <= ID) {
- acc_t value = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic]);
- acc_t weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic]);
+ out_t value = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic]);
+ out_t weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic]);
value = value - input_zp;
weight = weight - weight_zp;
- acc = apply_add<acc_t>(acc, value * weight);
+ acc = apply_add<out_t>(acc, value * weight);
}
}
- acc = apply_add<acc_t>(acc, bias[oc]);
- tensor_write<acc_t>(output, [N,D,H,W,OC], [n,od,oy,ox,oc], acc);
+ acc = apply_add<out_t>(acc, bias[oc]);
+ tensor_write<out_t>(output, [N,D,H,W,OC], [n,od,oy,ox,oc], acc);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t|weight_t|acc_t
+|Profile|Mode|in_t|weight_t|out_t
|Any|signed 8x8|int8_t|int8_t|int32_t
|Any|signed 8x4|int8_t|int4_t|int32_t
@@ -275,13 +275,13 @@ Performs 2D convolutions separately over each channel of the given tensor input,
|Input|in_t*|input|[N,H,W,C]|Input tensor
|Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[KH,KW,C,M]|Weight kernel size KH x KW
-|Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[C*M]|Per output channel bias data.
+|Input (MT profile) Attribute (BI/MI profiles)|out_t*|bias|[C*M]|Per output channel bias data.
|Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
|Attribute|int*|stride|[2]|[stride_y, stride_x]
|Attribute|int*|dilation|[2]|[dilation_y, dilation_x]
|Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types.
|Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types.
-|Output|acc_t*|output|[N,H,W,C*M]|Output tensor
+|Output|out_t*|output|[N,H,W,C*M]|Output tensor
|===
*Operation Function*
@@ -295,29 +295,29 @@ ERROR_IF(stride_y < 1 || stride_x < 1);
ERROR_IF(dilation_y < 1 || dilation_x < 1);
pad = flatten([0,0], pad, [0,0]);
for_each(0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < (C * M), 0 <= m < M) {
- acc_t acc = 0;
+ out_t acc = 0;
iy = oy * stride_y - pad_top;
ix = ox * stride_x - pad_left;
for_each(0 <= ky < KH, 0 <= kx < KW) {
y = iy + ky * dilation_y;
x = ix + kx * dilation_x;
if (0 <= y < IH && 0 <= x < IW) {
- acc_t value = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c]);
- acc_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m]);
+ out_t value = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c]);
+ out_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m]);
value = value - input_zp;
weight = weight - weight_zp;
- acc = apply_add<acc_t>(acc, value * weight);
+ acc = apply_add<out_t>(acc, value * weight);
}
}
- acc = apply_add<acc_t>(acc, bias[(c * M) + m]);
- tensor_write<acc_t>(output, [N,H,W,C * M], [n,oy,ox,c * M + m], acc);
+ acc = apply_add<out_t>(acc, bias[(c * M) + m]);
+ tensor_write<out_t>(output, [N,H,W,C * M], [n,oy,ox,c * M + m], acc);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t|weight_t|acc_t
+|Profile|Mode|in_t|weight_t|out_t
|Any|signed 8x8|int8_t|int8_t|int32_t
|Any|signed 8x4|int8_t|int4_t|int32_t
@@ -336,10 +336,10 @@ Performs a fully connected network.
|Input|in_t*|input|[N,IC]|Input tensor
|Attribute|weight_t*|weight|[OC,IC]|Weights
-|Attribute|acc_t*|bias|[OC]|Per output channel bias data.
+|Attribute|out_t*|bias|[OC]|Per output channel bias data.
|Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types.
|Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types.
-|Output|acc_t*|output|[N,OC]|Output tensor
+|Output|out_t*|output|[N,OC]|Output tensor
|===
*Operation Function*
@@ -349,23 +349,23 @@ Performs a fully connected network.
ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t
ERROR_IF(weight_t != int8_t && weight_zp != 0);
for_each(0 <= n < N, 0 <= oc < OC) {
- acc_t acc = 0;
+ out_t acc = 0;
for_each(0 <= ic < IC) {
- acc_t value = tensor_read<in_t>(input, [N,IC], [n,ic]);
- acc_t weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic]);
+ out_t value = tensor_read<in_t>(input, [N,IC], [n,ic]);
+ out_t weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic]);
value = value - input_zp;
weight = weight - weight_zp;
- acc = apply_add<acc_t>(acc, value * weight);
+ acc = apply_add<out_t>(acc, value * weight);
}
- acc = apply_add<acc_t>(acc, bias[oc]);
- tensor_write<acc_t>(output, [N,OC], [n,oc], acc);
+ acc = apply_add<out_t>(acc, bias[oc]);
+ tensor_write<out_t>(output, [N,OC], [n,oc], acc);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t|weight_t|acc_t
+|Profile|Mode|in_t|weight_t|out_t
|Any|signed 8x8|int8_t|int8_t|int32_t
|Any|signed 8x4|int8_t|int4_t|int32_t
@@ -385,7 +385,7 @@ Performs two dimensional matrix multiplications. This allows both inputs to be a
|Input|in_t*|B|[N,C,W]|Input tensor B, N matrices of size CxW
|Attribute|in_t|A_zp|-|Input tensor A zero point. Must be zero for non-int8 types.
|Attribute|in_t|B_zp|-|Input tensor B zero point. Must be zero for non-int8 types.
-|Output|acc_t*|output|[N,H,W]|Output tensor, N matrices of size HxW
+|Output|out_t*|output|[N,H,W]|Output tensor, N matrices of size HxW
|===
*Operation Function*
@@ -394,22 +394,22 @@ Performs two dimensional matrix multiplications. This allows both inputs to be a
----
ERROR_IF(in_t != int8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t
for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) {
- acc_t acc = 0;
+ out_t acc = 0;
for_each(0 <= c < C) {
- acc_t value1 = tensor_read<in_t>(A, [N,H,C], [n,h,c]);
- acc_t value2 = tensor_read<in_t>(B, [N,C,W], [n,c,w]);
+ out_t value1 = tensor_read<in_t>(A, [N,H,C], [n,h,c]);
+ out_t value2 = tensor_read<in_t>(B, [N,C,W], [n,c,w]);
value1 = value1 - A_zp;
value2 = value2 - B_zp;
- acc = apply_add<acc_t>(acc, value1 * value2);
+ acc = apply_add<out_t>(acc, value1 * value2);
}
- tensor_write<acc_t>(output, [N,H,W], [n,h,w], acc);
+ tensor_write<out_t>(output, [N,H,W], [n,h,w], acc);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t|acc_t
+|Profile|Mode|in_t|out_t
|Any|signed 8x8|int8_t|int32_t
|Any|signed 16x16|int16_t|int48_t
@@ -424,11 +424,11 @@ This performs a max pooling over the given input tensor. A sliding window of siz
|===
|Argument|Type|Name|Shape|Description
-|Input|in_t*|input|[N,IH,IW,C]|Input tensor 4D
+|Input|in_out_t*|input|[N,IH,IW,C]|Input tensor 4D
|Attribute|int*|kernel|[2]|[kernel_y, kernel_x]
|Attribute|int*|stride|[2]|[stride_y, stride_x]
|Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right]
-|Output|in_t*|output|[N,H,W,C]|Output tensor 4D
+|Output|in_out_t*|output|[N,H,W,C]|Output tensor 4D
|===
*Operation Function:*
@@ -448,25 +448,25 @@ ERROR_IF(H != idiv((IH + pad_top + pad_bottom + stride_y - kernel_y), stride_y))
ERROR_IF(W != idiv((IW + pad_left + pad_right + stride_x - kernel_x), stride_x));
for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
- in_t acc = minimum_value<in_t>;
+ in_out_t acc = minimum_value<in_out_t>;
iy = oy * stride_y - pad_top;
ix = ox * stride_x - pad_left;
for_each( 0 <= ky < kernel_y, 0 <= kx < kernel_x ) {
y = iy + ky;
x = ix + kx;
if (y >= 0 && y < IH && x >= 0 && x < IW) {
- in_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c]);
+ in_out_t value = tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]);
acc = apply_max(acc, value);
}
}
- tensor_write<in_t>(output, [N,H,W,C], [n,oy,ox,c], acc);
+ tensor_write<in_out_t>(output, [N,H,W,C], [n,oy,ox,c], acc);
}
----
*Supported Data Types:*
|===
-|Profile|Mode|in_t
+|Profile|Mode|in_out_t
|Any|signed 8|int8_t
|Any|16-bit|int16_t
@@ -484,13 +484,13 @@ Performs a 2D transposed convolution over the given tensor input, using the weig
|Input|in_t*|input|[N,IH,IW,IC]|Input tensor
|Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW
-|Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[OC]|Per output channel bias data.
+|Input (MT profile) Attribute (BI/MI profiles)|out_t*|bias|[OC]|Per output channel bias data.
|Attribute|int*|out_pad|[2]|[out_pad_top, out_pad_left]
|Attribute|int*|stride|[2]|[stride_y, stride_x]
|Attribute|int*|out_shape|[4]|[N,OH,OW,OC]
|Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types.
|Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types.
-|Output|acc_t*|output|[N,OH,OW,OC]|Output tensor
+|Output|out_t*|output|[N,OH,OW,OC]|Output tensor
|===
*Operation Function*
@@ -502,20 +502,20 @@ ERROR_IF(weight_t != int8_t && weight_zp != 0);
ERROR_IF(out_pad_top < 0 || out_pad_left < 0);
ERROR_IF(stride_y < 1 || stride_x < 1);
for_each(index in out_shape) {
- tensor_write<acc_t>(output, [N,OH,OW,OC], index, bias[index[3]])
+ tensor_write<out_t>(output, [N,OH,OW,OC], index, bias[index[3]])
}
for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC,
0 <= ic < IC, 0 <= ky < KH, 0 <= kx < KW) {
oy = iy * stride_y - out_pad_top + ky;
ox = ix * stride_x - out_pad_left + kx;
if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) {
- acc_t acc = tensor_read<acc_t>(output, [N,OH,OW,OC], [n,oy,ox,oc]);
- acc_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic]);
- acc_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]);
+ out_t acc = tensor_read<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc]);
+ out_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic]);
+ out_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]);
value = value - input_zp;
weight = weight - weight_zp;
- acc = apply_add<acc_t>(acc, value * weight);
- tensor_write<acc_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc);
+ acc = apply_add<out_t>(acc, value * weight);
+ tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc);
}
}
----
@@ -523,7 +523,7 @@ for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC,
*Supported Data Types:*
|===
-|Profile|Mode|in_t|weight_t|acc_t
+|Profile|Mode|in_t|weight_t|out_t
|Any|signed 8x8|int8_t|int8_t|int32_t
|Any|signed 8x4|int8_t|int4_t|int32_t