From ca2a854e3d46f91ecaa446d4b2311112cc2326fd Mon Sep 17 00:00:00 2001 From: Dominic Symes Date: Fri, 19 Mar 2021 13:56:27 +0000 Subject: Add definition of TOSA compliance Signed-off-by: Dominic Symes Change-Id: I2e25d0467843adb078d5ab9fd681af40b2ffa52e --- chapters/control_flow.adoc | 18 ++++++------ chapters/data_layout.adoc | 2 +- chapters/ewise_binary.adoc | 3 +- chapters/ewise_unary.adoc | 4 +-- chapters/image.adoc | 14 +++++----- chapters/introduction.adoc | 64 ++++++++++++++++++++++++++++++++++++++++--- chapters/operators.adoc | 22 +++++++++++---- chapters/pseudocode.adoc | 33 ++++++++++++++++------ chapters/reduction.adoc | 24 ++++++++-------- chapters/tensor_ops.adoc | 30 ++++++++++---------- chapters/type_conversion.adoc | 9 +++--- 11 files changed, 152 insertions(+), 71 deletions(-) diff --git a/chapters/control_flow.adoc b/chapters/control_flow.adoc index 2759464..c9d4e15 100644 --- a/chapters/control_flow.adoc +++ b/chapters/control_flow.adoc @@ -31,10 +31,10 @@ Evaluates a Boolean condition and then takes one of two distinct execution paths [source,c++] ---- -REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(then_graph)); -REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(else_graph)); -REQUIRE(tensor_list_shape(output_list) == tosa_output_shape(then_graph)); -REQUIRE(tensor_list_shape(output_list) == tosa_output_shape(else_graph)); +ERROR_IF(tensor_list_shape(input_list) != tosa_input_shape(then_graph)); +ERROR_IF(tensor_list_shape(input_list) != tosa_input_shape(else_graph)); +ERROR_IF(tensor_list_shape(output_list) != tosa_output_shape(then_graph)); +ERROR_IF(tensor_list_shape(output_list) != tosa_output_shape(else_graph)); if (condition) { tosa_execute_graph(then_graph, input_list, output_list); @@ -62,11 +62,11 @@ Generates and evaluates a Bool condition and either executes a loop body or exit [source,c++] ---- -REQUIRE(tensor_list_shape(input_list) == tosa_list_shape(output_list)); -REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(cond_graph)); -REQUIRE(tensor_list_shape(input_list) == tosa_input_shape(body_graph)); -REQUIRE(tensor_list_shape(input_list) == tosa_output_shape(body_graph)); -REQUIRE(tosa_output_shape(cond_graph) == tosa_list_shape([bool_t])); +ERROR_IF(tensor_list_shape(input_list) != tosa_list_shape(output_list)); +ERROR_IF(tensor_list_shape(input_list) != tosa_input_shape(cond_graph)); +ERROR_IF(tensor_list_shape(input_list) != tosa_input_shape(body_graph)); +ERROR_IF(tensor_list_shape(input_list) != tosa_output_shape(body_graph)); +ERROR_IF(tosa_output_shape(cond_graph) != tosa_list_shape([bool_t])); // The iteration number 'i' is included to give unique names to variables // in each iteration of the loop and is not required by implementations diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc index 09df5be..e625085 100644 --- a/chapters/data_layout.adoc +++ b/chapters/data_layout.adoc @@ -121,7 +121,7 @@ Returns a tensor with the same type/values as the input, with a new shape specif [source,c++] ---- -REQUIRE(tensor_size(shape1) == tensor_size(shape)); +ERROR_IF(tensor_size(shape1) != tensor_size(shape)); for(i = 0; i < tensor_size(shape); i++) { output[i] = input[i]; } diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc index d94676c..2b1eadd 100644 --- a/chapters/ewise_binary.adoc +++ b/chapters/ewise_binary.adoc @@ -540,13 +540,12 @@ Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match [source,c++] ---- -REQUIRE(in_t == int32_t || shift == 0); for_each(index in shape) { index1 = apply_broadcast(shape, shape1, index); index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - if (shift>0) { + if (in_t == int32_t && shift > 0) { out_t acc = apply_scale_32(value1, value2, shift); } else { out_t acc = value1 * value2; // low 32-bits of result for int32_t diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc index 304ef72..036c6e3 100644 --- a/chapters/ewise_unary.adoc +++ b/chapters/ewise_unary.adoc @@ -259,8 +259,8 @@ Elementwise negation operation [source,c++] ---- -REQUIRE(in_t == int8_t || input1_zp == 0) // Zero point only for int8_t -REQUIRE(in_t == int8_t || output_zp == 0) // Zero point only for int8_t +ERROR_IF(in_t != int8_t && input1_zp != 0) // Zero point only for int8_t +ERROR_IF(in_t != int8_t && output_zp != 0) // Zero point only for int8_t for_each(index in shape) { acc_t acc = tensor_read(input1, shape, index, input1_zp); acc = apply_sub(0, acc); diff --git a/chapters/image.adoc b/chapters/image.adoc index e098bac..f997992 100644 --- a/chapters/image.adoc +++ b/chapters/image.adoc @@ -55,21 +55,21 @@ None ---- // Ensure image size is supported by GPU APIs and that for integer // implementations, position * stride does not overflow int32_t. -REQUIRE(max(OH,OW,IH,IW) < 16384); +ERROR_IF(max(OH,OW,IH,IW) >= 16384); if (resize_t == float_t) { // The shift attribute is not used for floating point - REQUIRE(shift == 0); + ERROR_IF(shift != 0); } else { // if in_t=int8_t ensure that an int32_t accumulator can be used - REQUIRE(0 < shift && shift <= 11); + ERROR_IF(shift < 1 || shift > 11); // set a consistent lower limit of 1/16 downscale // independent of the shift value to simplify implementations - REQUIRE(0 < stride_x && stride_x < (16 << shift)); - REQUIRE(0 < stride_y && stride_y < (16 << shift)); + ERROR_IF(stride_x <= 0 || stride_x >= (16 << shift)); + ERROR_IF(stride_y <= 0 || stride_y >= (16 << shift)); // offset range is similarly limited to maximum 16 pixels irrespective // of shift. Both stride and offset fit in int16_t when shift=11. - REQUIRE((-16 << shift) < offset_x && offset_x < (16 << shift)); - REQUIRE((-16 << shift) < offset_y && offset_y < (16 << shift)); + ERROR_IF(offset_x <= (-16 << shift) || offset_x >= (16 << shift)); + ERROR_IF(offset_y <= (-16 << shift) || offset_y >= (16 << shift)); } for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) { unit = (resize_t == float_t) ? 1.0 : (1 << shift); diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc index 33ebea1..72c0298 100644 --- a/chapters/introduction.adoc +++ b/chapters/introduction.adoc @@ -67,6 +67,62 @@ The following table summarizes the three profiles: |Main Training|TOSA-MT|Yes|Yes|Yes |=== +=== Compliance + +This section defines when a TOSA implementation is compliant to a given TOSA specification profile. +The term conformant will mean the same as compliant. + +==== Baseline Inference Profile + +The <> section of this specification defines a TOSA graph and the behaviour defined for a TOSA graph. +This behaviour is captured in the pseudo-code function tosa_execute_graph(). +For a given input graph (with attributes) and input tensors there are three possible tosa_graph_result values after executing the graph: + +* tosa_unpredictable: The result of the graph on the given inputs cannot be relied upon. +* tosa_error: The graph does not meet the specification and is recognised as an illegal graph. +* tosa_valid: The result is defined and predictable and the list of output tensors defines the result. + +An implementation is compliant to the TOSA Baseline Inference Profile if it matches the above results as follows: + +* For tosa_unpredictable, the implementation can return whatever result it chooses (including error) +* For tosa_error, the implementation must return an error result (and there is no requirement on how much of the graph is executed, if any) +* For tosa_valid, the implementation must execute the entire graph without error and return the result defined by this specification. + +In terms of psuedo-code, if *graph* is a TOSA graph consisting of Baseline Inference Profile operators and *input_list* is a list of input tensors then the following test must pass. + +[source,c++] +---- +bool tosa_test_compliance(tosa_graph_t graph, tosa_list_t input_list) { + shape_list_t output_list_spec = tosa_allocate_list(tosa_output_shape(graph)); + shape_list_t output_list_test = tosa_allocate_list(tosa_output_shape(graph)); + tosa_graph_result = tosa_valid // result starts as valid + tosa_execute_graph(graph, input_list, output_list_spec); + if (tosa_graph_result == tosa_unpredictable) { + return true; // No requirement to match an unpredictable result + } + result_test = execute_implementation_under_test(graph, input_list, output_list_test); + if (tosa_graph_result == tosa_error) { + return result_test == tosa_error; // result must be an error + } + if (exact_tensor_match(output_list_spec, output_list_test)) { + // Predictable bit-exact value match required + return true; + } + return false; +} +---- + +==== Main Inference and Main Training Profile + +An implementation is compliant to the Main Inference or Main Training profiles if the following both hold for that respective profile: + +* For a graph returning tosa_error the implementation must also return an error +* For a graph returning tosa_valid the implementation must execute the entire graph without error +* For a graph returning tosa_valid and consisting only of integer operators the results must match exactly +* The implementation must report the maximum relative error on a set of standard graphs that contain floating point operators. These graphs will be provided as a future appendix to this specification. + +Note that for graphs containing floating point there is no strict precision requirement that must be met, but that the precision achieved must be reported. + === Operator Selection TOSA defines a set of primitive operators to which higher level operators can be lowered in a consistent way. To remain effective and efficient to implement the set of operators must be constrained to a reasonably small set of primitive operations out of which others can be constructed. The following principles govern the selection of operators within TOSA. @@ -197,9 +253,9 @@ The padding array represents the before and after pair for each dimension. [source,c++] ---- -REQUIRE((pad == NULL) || size(pad) == 2 * size(shape)); +ERROR_IF((pad != NULL) && size(pad) != 2 * size(shape)); out_t tensor_read(in_t *address, dim_t shape, dim_t index, in_t zero_point=0, dim_t pad=NULL) { - REQUIRE(in_t == int8_t || zero_point == 0) + ERROR_IF(in_t != int8_t && zero_point != 0) unsigned offset = 0; for (i = 0; i < rank(shape); i++) { if (index[i] < 0) { @@ -242,10 +298,10 @@ The following function maps an index in the output tensor to an index in the inp [source,c++] ---- dim_t apply_broadcast(dim_t out_shape, dim_t in_shape, dim_t index) { - REQUIRE(rank(out_shape) == rank(in_shape)); + ERROR_IF(rank(out_shape) != rank(in_shape)); for (i = 0; i < rank(out_shape); i++) { if (out_shape[i] != in_shape[i]) { - REQUIRE(in_shape[i] == 1); + ERROR_IF(in_shape[i] != 1); index[i] = 0; } } diff --git a/chapters/operators.adoc b/chapters/operators.adoc index 75084d1..264063a 100644 --- a/chapters/operators.adoc +++ b/chapters/operators.adoc @@ -19,6 +19,14 @@ An operator processes input operands to produce output operands. An operator can === Operator Graphs +A TOSA graph is a collection of TOSA operators where: + +* The output of an operator in the graph may be connected to one or more inputs of other operators in the graph +* When an output is connected to an input the tensor list shapes must match +* The attributes of the operators are defined and considered part of the graph +* The attributes must be in the valid range permitted for the operator +* The tensor dimensions must be in the valid range permitted for the operator + Some operators, such as control flow operators, take a graph of other operators as an attribute. The type tosa_graph_t will denote a graph of operators and the following functions define the tensor shape list for the graph input and outputs: [source,c++] @@ -38,12 +46,14 @@ The following function denotes the execution of a TOSA graph, on an input tensor [source,c++] ---- tosa_execute_graph(tosa_graph_t graph, tosa_list_t input_list, tosa_list_t output_list) { - REQUIRE(tensor_list_shape(input_list)==tosa_input_shape(graph)); - REQUIRE(tensor_list_shape(output_list)==tosa_output_shape(graph)); - - if (tosa_graph_result_unpredictable == true) { - // Result of TOSA graph execution is unpredictable due to calling - // the unpredictable() function during execution. + ERROR_IF(tensor_list_shape(input_list) != tosa_input_shape(graph)); + ERROR_IF(tensor_list_shape(output_list) != tosa_output_shape(graph)); + for_each(operator in graph order) { + ERROR_IF(operator input tensors do not meet requirement of operator Arguments inputs) + ERROR_IF(operator attributes do not meet requirement of operator Arguments attributes) + ERROR_IF(operator output tensors do not meet requirement of operator Arguments outputs) + ERROR_IF(operator data types do not meet requirement of operator Supported Data Types) + } } ---- diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc index 901211a..d9d8836 100644 --- a/chapters/pseudocode.adoc +++ b/chapters/pseudocode.adoc @@ -15,22 +15,37 @@ This section contains pseudocode functions shared across multiple operators in t === Operator Validation Helpers - The following functions are used to define the valid conditions for TOSA operators. + The REQUIRE function defines the conditions required by the TOSA operator. -When a call to unpredictable() is made, processing defined in the pseudocode for this operator may or may not be executed. -Once unpredictable is called, the whole TOSA graph is considered unpredictable, even if the unpredictable result does not propagate to the graph output. +If the conditions are not met then the result of the TOSA graph is marked as unpredictable. +Once the tosa_graph_result is set to tosa_unpredictable, the whole graph is considered unpredictable. + +The ERROR_IF function defines a condition that must set an error if the condition holds and the graph is not unpredictable. +Note that if a graph contains both unpredictable and error statements then result of tosa_execute_graph() is tosa_unpredictable. +This condition is captured in the ERROR_IF function. + +*Implementation Notes* + +* An implementation is not required to detect unpredictable behaviour. If tosa_execute_graph() returns tosa_unpredictable then the tosa_test_compliance() function does not require any specific output from an implementation. +* An implementation is required to detect errors in a graph that does not have unpredictable behaviour (see tosa_test_compliance). +* An acceptable implementation is to stop and report an error on the first ERROR_IF condition that occurs. This satifies tosa_test_compliance() even if the tosa_execute_graph() was tosa_unpredictable. +* If the tosa_execute_graphs() result is tosa_unpredictable or tosa_error, then there is no requirement on the implementation to execute any portion of the TOSA graph. [source,c++] ---- -void unpredictable() { - // Behavior of this TOSA operator cannot be relied on if this is called. - tosa_graph_result_unpredictable = true; +void REQUIRE(condition) { + // Unpredictable overrides any previous result + if (!(condition)) { + tosa_graph_result = tosa_unpredictable; + } } -void REQUIRE(condition) { - if (not condition) { - unpredictable(); +void ERROR_IF(condition) { + // Error encodes a predictable error state and so is not registered + // if the graph is marked as unpredictable. + if (tosa_graph_result != tosa_unpredictable && condition) { + tosa_graph_result = tosa_error; } } ---- diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc index 391591f..c39fecf 100644 --- a/chapters/reduction.adoc +++ b/chapters/reduction.adoc @@ -27,8 +27,8 @@ Reduce a tensor along the given axis with a logical AND operation [source,c] ---- -REQUIRE(0 <= axis && axis < rank(shape1)); -REQUIRE(shape[axis] == 1); +ERROR_IF(axis < 0 || axis >= rank(shape1)); +ERROR_IF(shape[axis] != 1); for_each(index in shape) { tensor_write(output, shape, index, true); } @@ -68,8 +68,8 @@ Reduce a tensor along the given axis with a logical OR operation [source,c] ---- -REQUIRE(0 <= axis && axis < rank(shape1)); -REQUIRE(shape[axis] == 1); +ERROR_IF(axis < 0 || axis >= rank(shape1)); +ERROR_IF(shape[axis] != 1); for_each(index in shape) { tensor_write(output, shape, index, false); } @@ -109,8 +109,8 @@ Reduce a tensor along the given axis with a maximum operation [source,c] ---- -REQUIRE(0 <= axis && axis < rank(shape1)); -REQUIRE(shape[axis] == 1); +ERROR_IF(axis < 0 || axis >= rank(shape1)); +ERROR_IF(shape[axis] != 1); for_each(index in shape) { tensor_write(output, shape, index, minimum); } @@ -156,8 +156,8 @@ Quantization is ignored when doing the REDUCE_MIN operation. The input and outpu [source,c] ---- -REQUIRE(0 <= axis && axis < rank(shape1)); -REQUIRE(shape[axis]==1); +ERROR_IF(axis < 0 || axis >= rank(shape1)); +ERROR_IF(shape[axis] != 1); for_each(index in shape) { tensor_write(output, shape, index, maximum); } @@ -200,8 +200,8 @@ Reduce a tensor along the given axis by computing the product of the axis. [source,c] ---- -REQUIRE(0 <= axis && axis < rank(shape1)); -REQUIRE(shape[axis] == 1); +ERROR_IF(axis < 0 || axis >= rank(shape1)); +ERROR_IF(shape[axis] != 1); for_each(index in shape) { tensor_write(output, shape, index, 1.0); } @@ -241,8 +241,8 @@ Reduce a tensor along the given axis by computing the sum of the axis. [source,c] ---- -REQUIRE(0 <= axis && axis < rank(shape1)); -REQUIRE(shape[axis] == 1); +ERROR_IF(axis < 0 || axis >= rank(shape1)); +ERROR_IF(shape[axis] != 1); for_each(index in shape) { tensor_write(output, shape, index, 0); } diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index 6780b1c..9a1c035 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -31,7 +31,7 @@ None [source,c++] ---- -REQUIRE(axis >= 0 && axis < rank(shape1) && rank(shape1) <= 4); +ERROR_IF(axis < 0 || axis >= rank(shape1) || rank(shape1) > 4); if (axis == 0) { left_shape = []; } else { @@ -42,7 +42,7 @@ if (axis == rank(shape1)-1) { } else { right_shape = shape1[axis+1:rank(shape1) - 1]; } -REQUIRE(flatten(left_shape, right_shape) == shape); +ERROR_IF(flatten(left_shape, right_shape) != shape); for_each(left_index in left_shape) { for_each(right_index in right_shape) { in_t max_value = minimum_value; @@ -97,8 +97,8 @@ This performs an average pooling over the given input tensor. A sliding window o [source,c++] ---- -REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t -REQUIRE(in_t == int8_t || output_zp == 0); // Zero point only for int8_t +ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(in_t != int8_t && output_zp != 0); // Zero point only for int8_t pad = flatten([0,0], pad, [0,0]); for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { in_t output_val; @@ -164,8 +164,8 @@ Performs a 2D convolution over the given tensor input, using the weight tensor. [source,c++] ---- -REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t -REQUIRE(weight_t == int8_t || weight_zp == 0); +ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != int8_t && weight_zp != 0); pad = flatten([0,0], pad, [0,0]); for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { acc_t acc = 0; @@ -225,8 +225,8 @@ Performs a 3D convolution over the given input tensor. [source,c++] ---- -REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t -REQUIRE(weight_t == int8_t || weight_zp == 0); +ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != int8_t && weight_zp != 0); pad = flatten([0,0], pad, [0,0]); for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { acc_t acc = 0; @@ -289,8 +289,8 @@ Performs 2D convolutions separately over each channel of the given tensor input, [source,c++] ---- -REQUIRE(in_t == int8_t || input_zp == 0); // Zero point only for int8_t -REQUIRE(weight_t == int8_t || weight_zp == 0); +ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != int8_t && weight_zp != 0); pad = flatten([0,0], pad, [0,0]); for_each(0 <= n(output, [N,OH,OW,OC], index, bias[index[3]]) } diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc index 35cebe7..b4d372d 100644 --- a/chapters/type_conversion.adoc +++ b/chapters/type_conversion.adoc @@ -93,7 +93,7 @@ Rescale quantized values into a new domain. This function scales by factor: mult |Attribute|out_t|output_zp|-|Output tensor zero point |Input (MT profile) Attribute (BI/MI profiles)|mul_t|multiplier[NC]|-|Scaling multiplier array |Input (MT profile) Attribute (BI/MI profiles)|uint6_t|shift[NC] |-|Scaling shift array -|Input (MT profile) Attribute (BI/MI profiles)|bool_t|scale32|-|if (scale32) mul_t=int32_t else mul_t=int16_t +|Attribute|bool_t|scale32|-|if (scale32) mul_t=int32_t else mul_t=int16_t |Attribute|bool_t|double_round|-|Select double round mode |Attribute|bool_t|per_channel|-|if (per_channel) NC=shape[dims-1] else NC=1 |=== @@ -103,9 +103,10 @@ Rescale quantized values into a new domain. This function scales by factor: mult [source,c++] ---- for_each(index in shape) { - REQUIRE(in_t == int8_t || in_t == uint8_t || input_zp == 0); - REQUIRE(out_t == int8_t || out_t == uint8_t || output_zp == 0); - REQUIRE((scale32 && in_t != int48_t_t) || (!scale32 && !double_round)); + ERROR_IF(in_t != int8_t && in_t != uint8_t && input_zp != 0); + ERROR_IF(out_t != int8_t && out_t != uint8_t && output_zp != 0); + ERROR_IF(scale32 && int_t == int48_t); + ERROR_IF(!scale32 && double_round); int48_t value = tensor_read(input, shape, index, input_zp); int c = (per_channel) ? index[dims-1] : 0; int32_t result = (scale32) ? -- cgit v1.2.1