From a0e9a523fcee7f25d4a81289cf10e9f9082ee878 Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Fri, 12 Nov 2021 16:15:47 -0800 Subject: Use in_out_t when a type is used for input and output Also change acc_t to out_t when the value is being used as an output. This should make the argument tables easier to follow. Change-Id: I2a57f3c4eaf937f29da785ff5c11576663a39494 --- chapters/activation_funcs.adoc | 28 ++--- chapters/data_layout.adoc | 68 +++++------ chapters/data_nodes.adoc | 6 +- chapters/ewise_binary.adoc | 252 ++++++++++++++++++++--------------------- chapters/ewise_ternary.adoc | 16 +-- chapters/ewise_unary.adoc | 140 +++++++++++------------ chapters/reduction.adoc | 90 +++++++-------- chapters/tensor_ops.adoc | 140 +++++++++++------------ 8 files changed, 370 insertions(+), 370 deletions(-) diff --git a/chapters/activation_funcs.adoc b/chapters/activation_funcs.adoc index a58a1fc..84a1039 100644 --- a/chapters/activation_funcs.adoc +++ b/chapters/activation_funcs.adoc @@ -20,10 +20,10 @@ No zero point subtraction is done to the values, thus to clamp to the zero point |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|Input|shape|Input tensor -|Attribute|in_t|min_val|-|minimum clip value -|Attribute|in_t|max_val|-|maximum clip value -|Output|in_t*|Output|shape|Output tensor of same type and shape as input +|Input|in_out_t*|Input|shape|Input tensor +|Attribute|in_out_t|min_val|-|minimum clip value +|Attribute|in_out_t|max_val|-|maximum clip value +|Output|in_out_t*|Output|shape|Output tensor of same type and shape as input |=== *Operation Function:* @@ -31,16 +31,16 @@ No zero point subtraction is done to the values, thus to clamp to the zero point ---- ERROR_IF(max_val < min_val); for_each(index in shape) { - in_t value = tensor_read(input, shape, index); - value = apply_clip(value, min_val, max_val); - tensor_write(output, shape, index, value); + in_out_t value = tensor_read(input, shape, index); + value = apply_clip(value, min_val, max_val); + tensor_write(output, shape, index, value); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 8|int8_t |Any|signed 16|int16_t @@ -72,14 +72,14 @@ generate_lookup_table(&sigmoid_table, &sigmoid_reference); |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|Input|shape|Input tensor -|Output|in_t*|Output|shape|Output tensor of same type and shape as input +|Input|in_out_t*|Input|shape|Input tensor +|Output|in_out_t*|Output|shape|Output tensor of same type and shape as input |=== *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |MI, MT|floating-point|float_t |=== @@ -110,14 +110,14 @@ generate_lookup_table(&tanh_table, &tanh_reference); |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|Input|shape|Input tensor -|Output|in_t*|Output|shape|Output tensor of same type and shape as input +|Input|in_out_t*|Input|shape|Input tensor +|Output|in_out_t*|Output|shape|Output tensor of same type and shape as input |=== *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |MI, MT|floating-point|float_t |=== diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc index 54221f6..e50a14e 100644 --- a/chapters/data_layout.adoc +++ b/chapters/data_layout.adoc @@ -18,9 +18,9 @@ No data conversion happens during a concat operation. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shapes1[]|List of input tensors. All inputs must have the same rank and data type +|Input|in_out_t*|input1|shapes1[]|List of input tensors. All inputs must have the same rank and data type |Attribute|int|axis|-|Axis along which concatenation is to occur, in range from 0 to rank(shape)-1 -|Output|in_t*|output|shape|Output tensor +|Output|in_out_t*|output|shape|Output tensor |=== *Operation Function:* @@ -43,8 +43,8 @@ for_each(index1 in shape) { // For each output location, we are looking for the // appropriate input tensor if (index2[axis] >= 0 && index2[axis] < shapes1[t][axis]) { - in_t value = tensor_read(input1[t], shapes1[t], index2); - tensor_write(output, shape, index1, value); + in_out_t value = tensor_read(input1[t], shapes1[t], index2); + tensor_write(output, shape, index1, value); } index2[axis] = index2[axis] - shapes1[t][axis]; } @@ -55,7 +55,7 @@ for_each(index1 in shape) { *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Boolean|bool_t |Any|signed 8|int8_t @@ -75,10 +75,10 @@ The pad_const value includes the zero point if the tensor uses a zero point. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor +|Input|in_out_t*|input1|shape1|Input tensor |Attribute|int|padding|[rank(input1),2]|Amount of padding to be done -|Attribute|in_t|pad_const|-|Constant value to be used as padding -|Output|in_t*|output|shape|Output tensor of same type as the input tensor +|Attribute|in_out_t|pad_const|-|Constant value to be used as padding +|Output|in_out_t*|output|shape|Output tensor of same type as the input tensor |=== *Operation Function:* @@ -98,15 +98,15 @@ for_each(index in shape) { is_pad = true; } } - in_t value = is_pad ? pad_const : tensor_read(input1, shape1, index1); - tensor_write(output, shape, index, value); + in_out_t value = is_pad ? pad_const : tensor_read(input1, shape1, index1); + tensor_write(output, shape, index, value); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Boolean|bool_t |Any|signed 8|int8_t @@ -124,9 +124,9 @@ Returns a tensor with the same type/values as the input, with a new shape specif |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor +|Input|in_out_t*|input1|shape1|Input tensor |Attribute|int|new_shape|[rank(output)]|List of values, with each element giving the size of the result tensor for the given dimension. At most one dimension may be given as -1 to automatically calculate the dimension size. -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* @@ -142,7 +142,7 @@ for(i = 0; i < tensor_size(shape); i++) { *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Boolean|bool_t |Any|signed 8|int8_t @@ -160,9 +160,9 @@ Returns a tensor with the same type/values as the input, with the data reversed |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input|shape|Input tensor from 1 to 4 dims +|Input|in_out_t*|input|shape|Input tensor from 1 to 4 dims |Attribute|int|axis|-|Axis to reverse, in range from 0 to rank(shape)-1 -|Output|in_t*|output|shape|Output tensor. Same shape as input tensor. +|Output|in_out_t*|output|shape|Output tensor. Same shape as input tensor. |=== *Operation Function:* @@ -173,15 +173,15 @@ ERROR_IF(axis < 0 || axis >= rank(shape)); for_each(index in shape) { tmp_index = index; tmp_index[axis] = shape[axis] - 1 - index[axis]; - in_t value = tensor_read(input, shape, tmp_index); - tensor_write(output, shape, index, value); + in_out_t value = tensor_read(input, shape, tmp_index); + tensor_write(output, shape, index, value); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Boolean|bool_t |Any|signed 8|int8_t @@ -199,10 +199,10 @@ No data conversion happens during a slice operation. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor with rank from 1 to 4 +|Input|in_out_t*|input1|shape1|Input tensor with rank from 1 to 4 |Attribute|int|start|[rank(input1)]|List of integer coordinates, of length equal to the rank of input1. Start coordinate for slicing. |Attribute|int|size|[rank(input1)]|List of integer size values, of length equal to the rank of input1. Size of the input to be used. -|Output|in_t*|output|shape|Output tensor of same type as the input tensor +|Output|in_out_t*|output|shape|Output tensor of same type as the input tensor |=== *Operation Function:* @@ -225,15 +225,15 @@ for_each(index in shape) { for(i = 0; i < rank(shape); i++) { tmp_index[i] = index[i] + start[i]; } - in_t value = tensor_read(input, shape1, tmp_index); - tensor_write(output, shape, index, value); + in_out_t value = tensor_read(input, shape1, tmp_index); + tensor_write(output, shape, index, value); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Boolean|bool_t |Any|signed 8|int8_t @@ -251,9 +251,9 @@ Replicates input1 multiplies times along each dimension. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor with rank from 1 to 4 +|Input|in_out_t*|input1|shape1|Input tensor with rank from 1 to 4 |Attribute|int32_t|multiplies|[rank(shape1)]|Number of times to replicate input1 in each dimension -|Output|in_t*|output|shape|Output tensor of same type, rank as the input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, rank as the input tensor |=== *Operation Function:* @@ -266,15 +266,15 @@ for_each(index in shape) { REQUIRE(shape1[i] * multiplies[i] == shape[i]); tmp_index[i] = index[i] % shape1[i]; } - in_t value = tensor_read(input, shape1, tmp_index); - tensor_write(output, shape, index, value); + in_out_t value = tensor_read(input, shape1, tmp_index); + tensor_write(output, shape, index, value); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Boolean|bool_t |Any|signed 8|int8_t @@ -292,9 +292,9 @@ Permutes the dimensions based on perm. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor with minimum rank of one. +|Input|in_out_t*|input1|shape1|Input tensor with minimum rank of one. |Attribute|int32_t|perms|[rank(input1)]|List of integers of length equal to the rank of input1. Values must be valid dimensions within shape1, and may not be repeated. -|Output|in_t*|output|shape|Output tensor of same type, rank as the input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, rank as the input tensor |=== *Operation Function:* @@ -315,15 +315,15 @@ for_each(index in shape) { REQUIRE(shape1[perm[i]] == shape[i]) tmp_index[perm[i]] = index[i] } - in_t value = tensor_read(input, shape1, tmp_index); - tensor_write(output, shape, index, value); + in_out_t value = tensor_read(input, shape1, tmp_index); + tensor_write(output, shape, index, value); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Boolean|bool_t |Any|signed 8|int8_t diff --git a/chapters/data_nodes.adoc b/chapters/data_nodes.adoc index e4a3e88..9d32a62 100644 --- a/chapters/data_nodes.adoc +++ b/chapters/data_nodes.adoc @@ -43,14 +43,14 @@ Returns a tensor with the same shape, type, and contents as the input. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Boolean|bool_t |Any|signed 8|int8_t diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc index c624437..aa1c86c 100644 --- a/chapters/ewise_binary.adoc +++ b/chapters/ewise_binary.adoc @@ -19,9 +19,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -31,17 +31,17 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t result = apply_add(value1, value2); - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t result = apply_add(value1, value2); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 32|int32_t |MI, MT|floating-point|float_t @@ -57,10 +57,10 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 |Input|bool_t|round|-|If true then the shift is rounded -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -70,27 +70,27 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); // Ensure that shift amount is appropriate for the data type - REQUIRE((in_t == int32_t && 0 <= value2 && value2 <= 31) || - (in_t == int16_t && 0 <= value2 && value2 <= 15) || - (in_t == int8_t && 0 <= value2 && value2 <= 7)); + REQUIRE((in_out_t == int32_t && 0 <= value2 && value2 <= 31) || + (in_out_t == int16_t && 0 <= value2 && value2 <= 15) || + (in_out_t == int8_t && 0 <= value2 && value2 <= 7)); - in_t result = value1 >> value2; + in_out_t result = value1 >> value2; if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) { result = result + 1; } - result = apply_clip(result, minimum, maximum); - tensor_write(output, shape, index, result); + result = apply_clip(result, minimum, maximum); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 8|int8_t |Any|signed 16|int16_t @@ -107,9 +107,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary |=== *Operation Function:* @@ -119,17 +119,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t result = value1 & value2; - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t result = value1 & value2; + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 8|int8_t |Any|signed 16|int16_t @@ -146,9 +146,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -158,17 +158,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t result = value1 | value2; - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t result = value1 | value2; + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 8|int8_t |Any|signed 16|int16_t @@ -185,9 +185,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -197,17 +197,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t result = value1 ^ value2; - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t result = value1 ^ value2; + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 8|int8_t |Any|signed 16|int16_t @@ -227,9 +227,9 @@ Quantized integer divide should use TABLE (for 1/x) and MUL. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -239,20 +239,20 @@ Quantized integer divide should use TABLE (for 1/x) and MUL. for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); REQUIRE(value2 != 0); - // This catches the case where we divide minimum by -1 + // This catches the case where we divide minimum by -1 // which is not representable in two's complement - REQUIRE((int64_t)value1 / value2 <= maximum); - in_t result = value1 / value2; - tensor_write(output, shape, index, result); + REQUIRE((int64_t)value1 / value2 <= maximum); + in_out_t result = value1 / value2; + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 32|int32_t |=== @@ -267,9 +267,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -279,17 +279,17 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t result = value1 && value2; - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t result = value1 && value2; + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Bool|bool_t |=== @@ -304,9 +304,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -316,18 +316,18 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); REQUIRE(0 <= value2 && value2 <= 31); - in_t result = value1 << value2; - tensor_write(output, shape, index, result); + in_out_t result = value1 << value2; + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 8|int8_t |Any|signed 16|int16_t @@ -344,9 +344,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -356,18 +356,18 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); REQUIRE(0 <= value2 && value2 <= 31); - in_t result = (in_t)((unsigned in_t)value1 >> value2); - tensor_write(output, shape, index, result); + in_out_t result = (in_out_t)((unsigned in_out_t)value1 >> value2); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 8|int8_t |Any|signed 16|int16_t @@ -384,9 +384,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -396,17 +396,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t result = value1 || value2; - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t result = value1 || value2; + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Bool|bool_t |=== @@ -421,9 +421,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary |=== *Operation Function:* @@ -433,17 +433,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t result = value1 != value2; - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t result = value1 != value2; + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Bool|bool_t |=== @@ -458,9 +458,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -470,17 +470,17 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t result = apply_max(value1, value2); - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t result = apply_max(value1, value2); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 32|int32_t |MI, MT|floating-point|float_t @@ -496,9 +496,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -508,17 +508,17 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t result = apply_min(value1, value2); - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t result = apply_min(value1, value2); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 32|int32_t |MI, MT|floating-point|float_t @@ -579,9 +579,9 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor from 1 to 4 dims +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary |=== *Operation Function:* @@ -591,17 +591,17 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t result = apply_pow(value1, value2); - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t result = apply_pow(value1, value2); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |MI, MT|floating-point|float_t |=== @@ -616,9 +616,9 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape1|Input tensor -|Input|in_t*|input2|shape2|Input tensor with the same rank as input1 -|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary +|Input|in_out_t*|input1|shape1|Input tensor +|Input|in_out_t*|input2|shape2|Input tensor with the same rank as input1 +|Output|in_out_t*|output|shape|Output tensor with broadcast shape if necessary |=== *Operation Function:* @@ -628,17 +628,17 @@ Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); - in_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t result = apply_sub(value1, value2); - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index1); + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t result = apply_sub(value1, value2); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 32|int32_t |MI, MT|floating-point|float_t diff --git a/chapters/ewise_ternary.adoc b/chapters/ewise_ternary.adoc index 751c093..e61e1c2 100644 --- a/chapters/ewise_ternary.adoc +++ b/chapters/ewise_ternary.adoc @@ -19,9 +19,9 @@ Elementwise select of the output based on a condition. |Argument|Type|Name|Shape|Description |Input|cmp_t|input1|shape1|Input selector tensor -|Input|in_t*|input2|shape2|Input value tensor if input1 is True -|Input|in_t*|input3|shape3|Input value tensor if input1 is False -|Output|in_t*|output|shape|Output tensor of same type as input2 and input3, with broadcast shape if necessary +|Input|in_out_t*|input2|shape2|Input value tensor if input1 is True +|Input|in_out_t*|input3|shape3|Input value tensor if input1 is False +|Output|in_out_t*|output|shape|Output tensor of same type as input2 and input3, with broadcast shape if necessary |=== *Operation Function:* @@ -33,21 +33,21 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); index3 = apply_broadcast(shape, shape3, index); cmp_t value1 = tensor_read(input1, shape1, index1); - in_t value2 = tensor_read(input2, shape2, index2); - in_t value3 = tensor_read(input3, shape3, index3); - in_t result; + in_out_t value2 = tensor_read(input2, shape2, index2); + in_out_t value3 = tensor_read(input3, shape3, index3); + in_out_t result; if (value1) { result = value2; } else { result = value3; } - tensor_write(output, shape, index, result); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|cmp_t|in_t +|Profile|Mode|cmp_t|in_out_t |Any|Boolean|bool_t|bool_t |Any|signed 8|bool_t|int8_t diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc index 633b8ac..326cc3c 100644 --- a/chapters/ewise_unary.adoc +++ b/chapters/ewise_unary.adoc @@ -18,8 +18,8 @@ Elementwise absolute value operation |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* @@ -27,17 +27,17 @@ Elementwise absolute value operation [source,c++] ---- for_each(index in shape) { - in_t value1 = tensor_read(input1, shape, index); + in_out_t value1 = tensor_read(input1, shape, index); if (value1 < 0) - value1 = apply_sub(0, value1); - tensor_write(output, shape, index, value1); + value1 = apply_sub(0, value1); + tensor_write(output, shape, index, value1); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 32|int32_t |MI, MT|floating-point|float_t @@ -52,8 +52,8 @@ Elementwise bitwise NOT of input tensor. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* @@ -61,16 +61,16 @@ Elementwise bitwise NOT of input tensor. [source,c++] ---- for_each(index in shape) { - in_t value1 = tensor_read(input1, shape, index); - in_t result = ~value1; - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape, index); + in_out_t result = ~value1; + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 8|int8_t |Any|signed 16|int16_t @@ -86,8 +86,8 @@ Elementwise ceiling operation |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* @@ -95,16 +95,16 @@ Elementwise ceiling operation [source,c++] ---- for_each(index in shape) { - in_t value1 = tensor_read(input1, shape, index); - in_t result = apply_ceil(value1); - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape, index); + in_out_t result = apply_ceil(value1); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |MI, MT|floating-point|float_t |=== @@ -118,8 +118,8 @@ Elementwise count leading zeros operation |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* @@ -127,15 +127,15 @@ Elementwise count leading zeros operation [source,c++] ---- for_each(index in shape) { - in_t value1 = tensor_read(input1, shape, index); - in_t result = count_leading_zeros(value1); - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape, index); + in_out_t result = count_leading_zeros(value1); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 32|int32_t |=== @@ -149,8 +149,8 @@ Elementwise e to the x operation |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* @@ -158,16 +158,16 @@ Elementwise e to the x operation [source,c++] ---- for_each(index in shape) { - in_t value1 = tensor_read(input1, shape, index); - in_t result = apply_exp(value1); - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape, index); + in_out_t result = apply_exp(value1); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |MI, MT|floating-point|float_t |=== @@ -181,8 +181,8 @@ Elementwise floor operation |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* @@ -190,16 +190,16 @@ Elementwise floor operation [source,c++] ---- for_each(index in shape) { - in_t value1 = tensor_read(input1, shape, index); - in_t result = apply_floor(value1); - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape, index); + in_out_t result = apply_floor(value1); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |MI, MT|floating-point|float_t |=== @@ -213,8 +213,8 @@ Elementwise natural logarithm operation |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* @@ -222,16 +222,16 @@ Elementwise natural logarithm operation [source,c++] ---- for_each(index in shape) { - in_t value1 = tensor_read(input1, shape, index); - in_t result = apply_log(value1); - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape, index); + in_out_t result = apply_log(value1); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |MI, MT|floating-point|float_t |=== @@ -245,8 +245,8 @@ Elementwise logical NOT of input. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* @@ -254,16 +254,16 @@ Elementwise logical NOT of input. [source,c++] ---- for_each(index in shape) { - in_t value1 = tensor_read(input1, shape1, index); - in_t result = !value1; - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index); + in_out_t result = !value1; + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|bool|bool_t |=== @@ -277,31 +277,31 @@ Elementwise negation operation |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Attribute|in_t|input1_zp|-|Input 1 zero point. Must be zero for non-int8 types. -|Attribute|in_t|output_zp|-|Output zero point. Must be zero for non-int8 types. -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Attribute|in_out_t|input1_zp|-|Input 1 zero point. Must be zero for non-int8 types. +|Attribute|in_out_t|output_zp|-|Output zero point. Must be zero for non-int8 types. +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* [source,c++] ---- -ERROR_IF(in_t != int8_t && input1_zp != 0) // Zero point only for int8_t -ERROR_IF(in_t != int8_t && output_zp != 0) // Zero point only for int8_t +ERROR_IF(in_out_t != int8_t && input1_zp != 0) // Zero point only for int8_t +ERROR_IF(in_out_t != int8_t && output_zp != 0) // Zero point only for int8_t for_each(index in shape) { - in_t value1 = tensor_read(input1, shape, index); + in_out_t value1 = tensor_read(input1, shape, index); acc_t value = (acc_t)value1 - input1_zp; value = apply_sub(0, value); - in_t result = (in_t)apply_clip(value + output_zp, minimum, maximum); - tensor_write(output, shape, index, result); + in_out_t result = (in_out_t)apply_clip(value + output_zp, minimum, maximum); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t|acc_t +|Profile|Mode|in_out_t|acc_t |Any|signed 8|int8_t|int32_t |Any|signed 16|int16_t|int32_t @@ -318,8 +318,8 @@ Elementwise reciprocal operation. For integer operation, a TABLE should be used |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* @@ -327,16 +327,16 @@ Elementwise reciprocal operation. For integer operation, a TABLE should be used [source,c++] ---- for_each(index in shape) { - in_t value1 = tensor_read(input1, shape1, index); - in_t result = 1.0 / value1; - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index); + in_out_t result = 1.0 / value1; + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |MI, MT|floating-point|float_t |=== @@ -350,8 +350,8 @@ Elementwise reciprocal square root operation. For integer operation, a TABLE sho |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input1|shape|Input tensor -|Output|in_t*|output|shape|Output tensor of same type, size as the input tensor +|Input|in_out_t*|input1|shape|Input tensor +|Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== *Operation Function:* @@ -359,16 +359,16 @@ Elementwise reciprocal square root operation. For integer operation, a TABLE sho [source,c++] ---- for_each(index in shape) { - in_t value1 = tensor_read(input1, shape1, index); - in_t result = 1.0 / apply_sqrt(value1); - tensor_write(output, shape, index, result); + in_out_t value1 = tensor_read(input1, shape1, index); + in_out_t result = 1.0 / apply_sqrt(value1); + tensor_write(output, shape, index, result); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |MI, MT|floating-point|float_t |=== diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc index 11db960..fdf30df 100644 --- a/chapters/reduction.adoc +++ b/chapters/reduction.adoc @@ -18,9 +18,9 @@ Reduce a tensor along the given axis with a logical AND operation |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4 +|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4 |Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1 -|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor. +|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor. |=== *Operation Function:* @@ -32,22 +32,22 @@ ERROR_IF(shape[axis] != 1); // Initialize output state to true for_each(index in shape) { - tensor_write(output, shape, index, true); + tensor_write(output, shape, index, true); } for_each(index in shape1) { out_index = index; out_index[axis] = 0; - in_t value = tensor_read(input, shape1, index); - in_t state = tensor_read(output, shape, out_index); + in_out_t value = tensor_read(input, shape1, index); + in_out_t state = tensor_read(output, shape, out_index); state = state && value; - tensor_write(output, shape, out_index, state); + tensor_write(output, shape, out_index, state); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Boolean|bool_t |=== @@ -61,9 +61,9 @@ Reduce a tensor along the given axis with a logical OR operation |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4 +|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4 |Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1 -|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor. +|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor. |=== *Operation Function:* @@ -75,22 +75,22 @@ ERROR_IF(shape[axis] != 1); // Initialize output state to false for_each(index in shape) { - tensor_write(output, shape, index, false); + tensor_write(output, shape, index, false); } for_each(index in shape1) { out_index = index; out_index[axis] = 0; - in_t value = tensor_read(input, shape1, index); - in_t state = tensor_read(output, shape, out_index); + in_out_t value = tensor_read(input, shape1, index); + in_out_t state = tensor_read(output, shape, out_index); state = state || value; - tensor_write(output, shape, out_index, state); + tensor_write(output, shape, out_index, state); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|Boolean|bool_t |=== @@ -104,9 +104,9 @@ Reduce a tensor along the given axis with a maximum operation |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4 +|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4 |Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1 -|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor. +|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor. |=== *Operation Function:* @@ -116,22 +116,22 @@ Reduce a tensor along the given axis with a maximum operation ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); for_each(index in shape) { - tensor_write(output, shape, index, minimum); + tensor_write(output, shape, index, minimum); } for_each(index in shape1) { out_index = index; out_index[axis] = 0; - in_t value = tensor_read(input, shape1, index); - in_t state = tensor_read(output, shape, out_index); - state = apply_max(state, value); - tensor_write(output, shape, out_index, state); + in_out_t value = tensor_read(input, shape1, index); + in_out_t state = tensor_read(output, shape, out_index); + state = apply_max(state, value); + tensor_write(output, shape, out_index, state); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 8|int8_t |Any|signed 16|int16_t @@ -147,9 +147,9 @@ Reduce a tensor along the given axis with a minimum operation |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4 +|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4 |Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1 -|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor. +|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor. |=== *Operation Function:* @@ -159,22 +159,22 @@ Reduce a tensor along the given axis with a minimum operation ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); for_each(index in shape) { - tensor_write(output, shape, index, maximum); + tensor_write(output, shape, index, maximum); } for_each(index in shape1) { out_index = index; out_index[axis] = 0; - in_t value = tensor_read(input, shape1, index); - in_t state = tensor_read(output, shape, out_index); - state = apply_min(state, value); - tensor_write(output, shape, out_index, state); + in_out_t value = tensor_read(input, shape1, index); + in_out_t state = tensor_read(output, shape, out_index); + state = apply_min(state, value); + tensor_write(output, shape, out_index, state); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 8|int8_t |Any|signed 16|int16_t @@ -191,9 +191,9 @@ Reduce a tensor along the given axis by computing the product of the axis. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4 +|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4 |Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1 -|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor. +|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor. |=== *Operation Function:* @@ -203,22 +203,22 @@ Reduce a tensor along the given axis by computing the product of the axis. ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); for_each(index in shape) { - tensor_write(output, shape, index, 1.0); + tensor_write(output, shape, index, 1.0); } for_each(index in shape1) { out_index = index; out_index[axis] = 0; - in_t value = tensor_read(input, shape1, index); - in_t state = tensor_read(output, shape, out_index); + in_out_t value = tensor_read(input, shape1, index); + in_out_t state = tensor_read(output, shape, out_index); state = state * value; - tensor_write(output, shape, out_index, state); + tensor_write(output, shape, out_index, state); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |MI, MT|floating-point|float_t |=== @@ -232,9 +232,9 @@ Reduce a tensor along the given axis by computing the sum of the axis. |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input|shape1|Input tensor with rank from 1 to 4 +|Input|in_out_t*|input|shape1|Input tensor with rank from 1 to 4 |Attribute|int32_t|axis|-|Axis to reduce, in range from 0 to rank(shape1)-1 -|Output|in_t*|output|shape|Output tensor. Same rank as the input tensor. +|Output|in_out_t*|output|shape|Output tensor. Same rank as the input tensor. |=== *Operation Function:* @@ -244,22 +244,22 @@ Reduce a tensor along the given axis by computing the sum of the axis. ERROR_IF(axis < 0 || axis >= rank(shape1)); ERROR_IF(shape[axis] != 1); for_each(index in shape) { - tensor_write(output, shape, index, 0); + tensor_write(output, shape, index, 0); } for_each(index in shape1) { out_index = index; out_index[axis] = 0; - in_t value = tensor_read(input, shape1, index); - in_t state = tensor_read(output, shape, out_index); - state = apply_add(state, value); - tensor_write(output, shape, out_index, state); + in_out_t value = tensor_read(input, shape1, index); + in_out_t state = tensor_read(output, shape, out_index); + state = apply_add(state, value); + tensor_write(output, shape, out_index, state); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 32|int32_t |MI, MT|floating-point|float_t diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index cfab5ba..b2f0754 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -74,21 +74,21 @@ When calculating the average, only the number of valid input tensor values, but |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input|[N,IH,IW,C]|Input tensor 4D +|Input|in_out_t*|input|[N,IH,IW,C]|Input tensor 4D |Attribute|int*|kernel|[2]|[kernel_y, kernel_x] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] -|Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. -|Attribute|in_t|output_zp|-|Output tensor zero point. Must be zero for non-int8 types. -|Output|in_t*|output|[N,H,W,C]|Output tensor 4D +|Attribute|in_out_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. +|Attribute|in_out_t|output_zp|-|Output tensor zero point. Must be zero for non-int8 types. +|Output|in_out_t*|output|[N,H,W,C]|Output tensor 4D |=== *Operation Function:* [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(in_t != int8_t && output_zp != 0); // Zero point only for int8_t +ERROR_IF(in_out_t != int8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(in_out_t != int8_t && output_zp != 0); // Zero point only for int8_t ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1 ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); @@ -102,7 +102,7 @@ ERROR_IF(H != idiv((IH + pad_top + pad_bottom + stride_y - kernel_y), stride_y)) ERROR_IF(W != idiv((IW + pad_left + pad_right + stride_x - kernel_x), stride_x)); for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { - in_t output_val; + in_out_t output_val; acc_t acc = 0; int count = 0; iy = oy * stride_y - pad_top; @@ -114,25 +114,25 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { // average, padding does not count if (0 <= y < IH and 0 <= x < IW) { count++; - acc_t value = tensor_read(input, [N,IH,IW,C], [n,y,x,c]); + acc_t value = tensor_read(input, [N,IH,IW,C], [n,y,x,c]); value = value - input_zp; acc = apply_add(acc, value); } } - if (is_float(in_t)) { + if (is_float(in_out_t)) { output_val = acc / (float)count; } else { scale_t scale = reciprocal_scale(count); acc = apply_scale_32(acc, scale.multiplier, scale.shift, false); - output_val = (in_t)apply_clip(acc + output_zp, minimum, maximum) + output_val = (in_out_t)apply_clip(acc + output_zp, minimum, maximum) } - tensor_write(output, [N,H,W,C], [n,oy,ox,c], output_val); + tensor_write(output, [N,H,W,C], [n,oy,ox,c], output_val); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t|acc_t +|Profile|Mode|in_out_t|acc_t |Any|signed 8|int8_t|int32_t |Any|signed 16|int16_t|int32_t @@ -150,13 +150,13 @@ Performs a 2D convolution over the given tensor input, using the weight tensor. |Input|in_t*|input|[N,IH,IW,IC]|Input tensor |Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW -|Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[OC]|Per output channel bias data. +|Input (MT profile) Attribute (BI/MI profiles)|out_t*|bias|[OC]|Per output channel bias data. |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|dilation|[2]|[dilation_y, dilation_x] |Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. |Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types. -|Output|acc_t*|output|[N,H,W,OC]|Output tensor +|Output|out_t*|output|[N,H,W,OC]|Output tensor |=== *Operation Function* @@ -170,29 +170,29 @@ ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(dilation_y < 1 || dilation_x < 1); pad = flatten([0,0], pad, [0,0]); for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { - acc_t acc = 0; + out_t acc = 0; iy = oy * stride_y - pad_top; ix = ox * stride_x - pad_left; for_each(0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { y = iy + ky * dilation_y; x = ix + kx * dilation_x; if (0 <= y < IH && 0 <= x < IW) { - acc_t value = tensor_read(input, [N,IH,IW,IC], [n,y,x,ic]); - acc_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); + out_t value = tensor_read(input, [N,IH,IW,IC], [n,y,x,ic]); + out_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); value = value - input_zp; weight = weight - weight_zp; - acc = apply_add(acc, value * weight); + acc = apply_add(acc, value * weight); } } - acc = apply_add(acc, bias[oc]); - tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], acc); + acc = apply_add(acc, bias[oc]); + tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], acc); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t|weight_t|acc_t +|Profile|Mode|in_t|weight_t|out_t |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t @@ -211,13 +211,13 @@ Performs a 3D convolution over the given input tensor. |Input|in_t*|input|[N,ID,IH,IW,IC]|Input tensor |Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KD,KH,KW,IC]|Weight kernel size KDxKHxKW -|Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[OC]|Per output channel bias data. +|Input (MT profile) Attribute (BI/MI profiles)|out_t*|bias|[OC]|Per output channel bias data. |Attribute|int*|pad|[6]|[pad_d0, pad_d1, pad_top, pad_bottom, pad_left, pad_right] |Attribute|int*|stride|[3]|[stride_d, stride_y, stride_x] |Attribute|int*|dilation|[3]|[dilation_d, dilation_y, dilation_x] |Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. |Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types. -|Output|acc_t*|output|[N,D,H,W,OC]|Output tensor +|Output|out_t*|output|[N,D,H,W,OC]|Output tensor |=== *Operation Function* @@ -231,7 +231,7 @@ ERROR_IF(stride_d < 1 || stride_y < 1 || stride_x < 1); ERROR_IF(dilation_d < 1 || dilation_y < 1 || dilation_x < 1); pad = flatten([0,0], pad, [0,0]); for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { - acc_t acc = 0; + out_t acc = 0; id = od * stride_d - pad_d0; iy = oy * stride_y - pad_top; ix = ox * stride_x - pad_left; @@ -240,22 +240,22 @@ for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { y = iy + ky * dilation_y; x = ix + kx * dilation_x; if (0 <= x < IW && 0 <= y < IH && 0 <= d <= ID) { - acc_t value = tensor_read(input, [N,ID,IH,IW,IC], [n,d,y,x,ic]); - acc_t weight = tensor_read(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic]); + out_t value = tensor_read(input, [N,ID,IH,IW,IC], [n,d,y,x,ic]); + out_t weight = tensor_read(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic]); value = value - input_zp; weight = weight - weight_zp; - acc = apply_add(acc, value * weight); + acc = apply_add(acc, value * weight); } } - acc = apply_add(acc, bias[oc]); - tensor_write(output, [N,D,H,W,OC], [n,od,oy,ox,oc], acc); + acc = apply_add(acc, bias[oc]); + tensor_write(output, [N,D,H,W,OC], [n,od,oy,ox,oc], acc); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t|weight_t|acc_t +|Profile|Mode|in_t|weight_t|out_t |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t @@ -275,13 +275,13 @@ Performs 2D convolutions separately over each channel of the given tensor input, |Input|in_t*|input|[N,H,W,C]|Input tensor |Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[KH,KW,C,M]|Weight kernel size KH x KW -|Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[C*M]|Per output channel bias data. +|Input (MT profile) Attribute (BI/MI profiles)|out_t*|bias|[C*M]|Per output channel bias data. |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|dilation|[2]|[dilation_y, dilation_x] |Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. |Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types. -|Output|acc_t*|output|[N,H,W,C*M]|Output tensor +|Output|out_t*|output|[N,H,W,C*M]|Output tensor |=== *Operation Function* @@ -295,29 +295,29 @@ ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(dilation_y < 1 || dilation_x < 1); pad = flatten([0,0], pad, [0,0]); for_each(0 <= n(input, [N,H,W,C], [n,y,x,c]); - acc_t weight = tensor_read(weight, [KH,KW,C,M], [ky,kx,c,m]); + out_t value = tensor_read(input, [N,H,W,C], [n,y,x,c]); + out_t weight = tensor_read(weight, [KH,KW,C,M], [ky,kx,c,m]); value = value - input_zp; weight = weight - weight_zp; - acc = apply_add(acc, value * weight); + acc = apply_add(acc, value * weight); } } - acc = apply_add(acc, bias[(c * M) + m]); - tensor_write(output, [N,H,W,C * M], [n,oy,ox,c * M + m], acc); + acc = apply_add(acc, bias[(c * M) + m]); + tensor_write(output, [N,H,W,C * M], [n,oy,ox,c * M + m], acc); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t|weight_t|acc_t +|Profile|Mode|in_t|weight_t|out_t |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t @@ -336,10 +336,10 @@ Performs a fully connected network. |Input|in_t*|input|[N,IC]|Input tensor |Attribute|weight_t*|weight|[OC,IC]|Weights -|Attribute|acc_t*|bias|[OC]|Per output channel bias data. +|Attribute|out_t*|bias|[OC]|Per output channel bias data. |Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. |Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types. -|Output|acc_t*|output|[N,OC]|Output tensor +|Output|out_t*|output|[N,OC]|Output tensor |=== *Operation Function* @@ -349,23 +349,23 @@ Performs a fully connected network. ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t ERROR_IF(weight_t != int8_t && weight_zp != 0); for_each(0 <= n < N, 0 <= oc < OC) { - acc_t acc = 0; + out_t acc = 0; for_each(0 <= ic < IC) { - acc_t value = tensor_read(input, [N,IC], [n,ic]); - acc_t weight = tensor_read(weight, [OC,IC], [oc,ic]); + out_t value = tensor_read(input, [N,IC], [n,ic]); + out_t weight = tensor_read(weight, [OC,IC], [oc,ic]); value = value - input_zp; weight = weight - weight_zp; - acc = apply_add(acc, value * weight); + acc = apply_add(acc, value * weight); } - acc = apply_add(acc, bias[oc]); - tensor_write(output, [N,OC], [n,oc], acc); + acc = apply_add(acc, bias[oc]); + tensor_write(output, [N,OC], [n,oc], acc); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t|weight_t|acc_t +|Profile|Mode|in_t|weight_t|out_t |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t @@ -385,7 +385,7 @@ Performs two dimensional matrix multiplications. This allows both inputs to be a |Input|in_t*|B|[N,C,W]|Input tensor B, N matrices of size CxW |Attribute|in_t|A_zp|-|Input tensor A zero point. Must be zero for non-int8 types. |Attribute|in_t|B_zp|-|Input tensor B zero point. Must be zero for non-int8 types. -|Output|acc_t*|output|[N,H,W]|Output tensor, N matrices of size HxW +|Output|out_t*|output|[N,H,W]|Output tensor, N matrices of size HxW |=== *Operation Function* @@ -394,22 +394,22 @@ Performs two dimensional matrix multiplications. This allows both inputs to be a ---- ERROR_IF(in_t != int8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) { - acc_t acc = 0; + out_t acc = 0; for_each(0 <= c < C) { - acc_t value1 = tensor_read(A, [N,H,C], [n,h,c]); - acc_t value2 = tensor_read(B, [N,C,W], [n,c,w]); + out_t value1 = tensor_read(A, [N,H,C], [n,h,c]); + out_t value2 = tensor_read(B, [N,C,W], [n,c,w]); value1 = value1 - A_zp; value2 = value2 - B_zp; - acc = apply_add(acc, value1 * value2); + acc = apply_add(acc, value1 * value2); } - tensor_write(output, [N,H,W], [n,h,w], acc); + tensor_write(output, [N,H,W], [n,h,w], acc); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t|acc_t +|Profile|Mode|in_t|out_t |Any|signed 8x8|int8_t|int32_t |Any|signed 16x16|int16_t|int48_t @@ -424,11 +424,11 @@ This performs a max pooling over the given input tensor. A sliding window of siz |=== |Argument|Type|Name|Shape|Description -|Input|in_t*|input|[N,IH,IW,C]|Input tensor 4D +|Input|in_out_t*|input|[N,IH,IW,C]|Input tensor 4D |Attribute|int*|kernel|[2]|[kernel_y, kernel_x] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] -|Output|in_t*|output|[N,H,W,C]|Output tensor 4D +|Output|in_out_t*|output|[N,H,W,C]|Output tensor 4D |=== *Operation Function:* @@ -448,25 +448,25 @@ ERROR_IF(H != idiv((IH + pad_top + pad_bottom + stride_y - kernel_y), stride_y)) ERROR_IF(W != idiv((IW + pad_left + pad_right + stride_x - kernel_x), stride_x)); for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { - in_t acc = minimum_value; + in_out_t acc = minimum_value; iy = oy * stride_y - pad_top; ix = ox * stride_x - pad_left; for_each( 0 <= ky < kernel_y, 0 <= kx < kernel_x ) { y = iy + ky; x = ix + kx; if (y >= 0 && y < IH && x >= 0 && x < IW) { - in_t value = tensor_read(input, [N,IH,IW,C], [n,y,x,c]); + in_out_t value = tensor_read(input, [N,IH,IW,C], [n,y,x,c]); acc = apply_max(acc, value); } } - tensor_write(output, [N,H,W,C], [n,oy,ox,c], acc); + tensor_write(output, [N,H,W,C], [n,oy,ox,c], acc); } ---- *Supported Data Types:* |=== -|Profile|Mode|in_t +|Profile|Mode|in_out_t |Any|signed 8|int8_t |Any|16-bit|int16_t @@ -484,13 +484,13 @@ Performs a 2D transposed convolution over the given tensor input, using the weig |Input|in_t*|input|[N,IH,IW,IC]|Input tensor |Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW -|Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[OC]|Per output channel bias data. +|Input (MT profile) Attribute (BI/MI profiles)|out_t*|bias|[OC]|Per output channel bias data. |Attribute|int*|out_pad|[2]|[out_pad_top, out_pad_left] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|out_shape|[4]|[N,OH,OW,OC] |Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. |Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types. -|Output|acc_t*|output|[N,OH,OW,OC]|Output tensor +|Output|out_t*|output|[N,OH,OW,OC]|Output tensor |=== *Operation Function* @@ -502,20 +502,20 @@ ERROR_IF(weight_t != int8_t && weight_zp != 0); ERROR_IF(out_pad_top < 0 || out_pad_left < 0); ERROR_IF(stride_y < 1 || stride_x < 1); for_each(index in out_shape) { - tensor_write(output, [N,OH,OW,OC], index, bias[index[3]]) + tensor_write(output, [N,OH,OW,OC], index, bias[index[3]]) } for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, 0 <= ic < IC, 0 <= ky < KH, 0 <= kx < KW) { oy = iy * stride_y - out_pad_top + ky; ox = ix * stride_x - out_pad_left + kx; if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) { - acc_t acc = tensor_read(output, [N,OH,OW,OC], [n,oy,ox,oc]); - acc_t value = tensor_read(input, [N,IH,IW,IC], [n,iy,ix,ic]); - acc_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); + out_t acc = tensor_read(output, [N,OH,OW,OC], [n,oy,ox,oc]); + out_t value = tensor_read(input, [N,IH,IW,IC], [n,iy,ix,ic]); + out_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); value = value - input_zp; weight = weight - weight_zp; - acc = apply_add(acc, value * weight); - tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); + acc = apply_add(acc, value * weight); + tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } } ---- @@ -523,7 +523,7 @@ for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, *Supported Data Types:* |=== -|Profile|Mode|in_t|weight_t|acc_t +|Profile|Mode|in_t|weight_t|out_t |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t -- cgit v1.2.1