From 5b936a3c5c335baab178edecf4c1da09b9a86707 Mon Sep 17 00:00:00 2001 From: Dominic Symes Date: Wed, 1 Mar 2023 11:34:40 +0000 Subject: Main inference compliance testing updates - Add additional main inference compliance test data sets for dot product testing in Appendix A. - Express dot product test criteria in terms of tensors rather than individual dot products. - Add per-operation details on test set generation in Appendix A. - Clarify compliance vs conformance wording. - Clarify that the comment in the table of section 1.8.2 on integer saturation applies to the CAST from floating point to integer operation. Change-Id: I1b4c4493b02ed7c8a6eb547656c91ca67d4b0e86 Signed-off-by: Dominic Symes --- chapters/appendix_a.adoc | 274 ++++++++++++++++++++++++++++++++++++++++----- chapters/introduction.adoc | 112 ++++++++++++------ tools/dictionary.dic | 2 + 3 files changed, 324 insertions(+), 64 deletions(-) diff --git a/chapters/appendix_a.adoc b/chapters/appendix_a.adoc index 33a4f11..f601d5d 100644 --- a/chapters/appendix_a.adoc +++ b/chapters/appendix_a.adoc @@ -30,37 +30,257 @@ float set_data(uint32_t set, uint32_t index) } ---- -=== Dot product floating-point test data sets +=== Main Inference test data generator -Each test set is indexed by a pair (S, N) where: +This section describes the function tosa_mi_data(S, KS, p, k, i) that generates test data for main inference compliance. +This function takes the following arguments: -* S is the test set number -* N is the number of elements in a single test vector +* S is the test set number which identifies which generator is used +* KS is the kernel size +* p is the parameter number of 0 for the first input (usually data) and 1 for the second input (usually weights) +* k is the index within the kernel in the range 0 \<= k < KS +* i is the index within the tensor to write -Each test set (S, N) contains multiple tests that statistics are calculated over. -The parameter T is the number of tests in a given set. -In the table below, t is the test number within a set in the range 0 to T-1. +Some test data values are scaled by the bound parameter B which is defined in the table below. +B is set to be the largest value that is both representable by the input type and such that B*B does not overflow the accumulator precision. -[cols="1,1,1,5,5"] |=== -| Set S | N range | T | x[k] formula for k < N | w[k] formula for k < N - -| 0 -| 2-25,50,100,1000 -| 10 -| x[k]=set_data(S, 2*t*N+2*k) < 0 ? 0.0 : set_data(S, 2*t*N+2*k+1) -| w[k]=set_data(S, 2*t*N+2*k) < 0 ? set_data(S, 2*t*N+2*k+1) : 0.0 - -| 1 -| 2-25,50,100,1000 -| 1000 -| x[k]=2.0*set_data(S, 2*t*N + k) -| w[k]=2.0*set_data(S, (2*t+1)*N + k) - -| 2 -| 2-25,50,100,1000 -| 1000 -| x[0]=1.0, x[k]=set_data(S, 2*t*N + k)/sqrt(N) for k>0 -| w[0]=1.0, w[k]=set_data(S, (2*t+1)*N + k)/sqrt(N) for k>0 +| inputs type | accumulator type | B value +| fp16 | fp16 | (1<<8) - (1/8) = 255.875 +| fp16 | fp32 | (1<<16) - (1<<5) = 65504 +| bf16 | fp32 | (1<<64) - (1<<56) +| fp32 | fp32 | (1<<64) - (1<<40) +|=== + +==== Test set S=0 generator + +The aim of this generator is to check that sum of products with zero gives zero result. + +[cols="1,9"] +|=== +| p | tosa_mi_data(S, KS, p, k, i) = +| 0 | set_data(2*S, i) < 0 ? 0.0 : set_data(2*S+1, i) +| 1 | set_data(2*S, i) < 0 ? set_data(2*S+1, i) : 0.0 +|=== + +==== Test set S=1 + +The aim of this test set is to check values with large exponents. + +[cols="1,9"] +|=== +| p | tosa_mi_data(S, KS, p, k, i) = +| 0 | (B/sqrt(N))*(0.75 + 0.25*set_data(2*S+0, i)) +| 1 | (B/sqrt(N))*(0.75 + 0.25*set_data(2*S+1, i)) +|=== + +==== Test set S=2 + +The aim of this test set is to check rounding error when accumulating small values onto a large value. +In this case the small values are of similar magnitude. +If the implementation changes the order of the sum, then the test data must also be reordered so that the largest values occur first in the sum. + +[cols="1,9"] +|=== +| p | tosa_mi_data(S, KS, p, k, i) = +| 0 | (k==0) ? 1.0 : set_data(2*S+0, i)/sqrt(KS) +| 1 | (k==0) ? 1.0 : set_data(2*S+1, i)/sqrt(KS) +|=== + +==== Test set S=3 + +The aim of this test set is to check rounding error when accumulating small values onto a large value. +In this case the small values are of varying magnitude. +If the implementation changes the order of the sum, then the test data must also be reordered so that the largest values occur first in the sum. + +[cols="1,9"] +|=== +| p | tosa_mi_data(S, KS, p, k, i) = +| 0 | (k==0) ? 16.0 : exp(2*set_data(2*S+0, 2*i+0)) * set_data(2*S+0, 2*i+1) +| 1 | (k==0) ? 16.0 : exp(2*set_data(2*S+1, 2*i+0)) * set_data(2*S+1, 2*i+1) +|=== + +==== Test set S=4 + +The aim of this test set is to check a mixture of zero and non-zero products. + +[cols="1,9"] +|=== +| p | tosa_mi_data(S, KS, p, k, i) = +| 0 | (k==KS/2) ? +0.5 : (set_data(2*S, i) < 0 ? 0.0 : B*set_data(2*S+1, i)) +| 1 | (k==KS/2) ? -0.5 : (set_data(2*S, i) < 0 ? B*set_data(2*S+1, i) : 0.0) +|=== +==== Test set S=5 + +The aim of this test set is to check signed inputs of large range. + +[cols="1,9"] |=== +| p | tosa_mi_data(S, KS, p, k, i) = +| 0 | (B/sqrt(KS))*set_data(2*S+0, i) +| 1 | (B/sqrt(KS))*set_data(2*S+1, i) +|=== + +=== Main Inference operator test data + +For each operator, this section defines how to generate test data for test set S. +For the results to be statistically significant the operation must calculate at least MIN_DOT_PRODUCTS dot products. +For most operations this means that the output tensor must have at least MIN_DOT_PRODUCTS output values. +For most operations batch size can be increased if necessary so that this holds. +For this version of the specification, MIN_DOT_PRODUCTS is set to 1000. + +==== CONV2D + +The following generates input test data for test set S. +For compliant implementation, the test must pass whenever the attributes satisfy: +`N*OH*OW*OC >= MIN_DOT_PRODUCTS` + +[source,c++] +---- +KS = KW*KH*IC; +for (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= ic < IC) { + input [ n, iy, ix, ic] = tosa_mi_data(S, KS, 0, ((iy % KH)*KW+(ix % KW))*IC+ic, ((n*IH+iy)*IW+ix)*IC+ic); +} +for (0 <= oc < OC, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { + weight[oc, ky, kx, ic] = tosa_mi_data(S, KS, 1, (ky*KW+kx)*IC+ic, ((oc*KH+ky)*KW+kx)*IC+ic); +} +---- + +==== CONV3D + +The following generates input test data for test set S. +For compliant implementation, the test must pass whenever the attributes satisfy: +`N*OD*OH*OW*OC >= MIN_DOT_PRODUCTS` + +[source,c++] +---- +KS = KD*KW*KH*IC; +for (0 <= n < N, 0 <= id < UD, 0 <= iy < IH, 0 <= ix < IW, 0 <= ic < IC) { + input [ n, id, iy, ix, ic] = tosa_mi_data(S, KS, 0, (((id % KD)*KH+(iy % KH))*KW+(ix % KW))*IC+ic, (((n*ID+id)*IH+iy)*IW+ix)*IC+ic); +} +for (0 <= oc < OC, 0 <= kd < KD, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { + weight[oc, kd, ky, kx, ic] = tosa_mi_data(S, KS, 1, ((kd*KH+ky)*KW+kx)*IC+ic, (((oc*KD+kd)*KH+ky)*KW+kx)*IC+ic); +} +---- + +==== DEPTHWISE_CONV2D + +The following generates input test data for test set S. +For compliant implementation, the test must pass whenever the attributes satisfy: +`N*OH*OW*C*M >= MIN_DOT_PRODUCTS` + +[source,c++] +---- +KS = KW*KH*C; +for (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= c < C) { + input [ n, iy, ix, c] = tosa_mi_data(S, KS, 0, ((iy % KH)*KW+(ix % KW))*C+c, ((n*IH+iy)*IW+ix)*C+c); +} +for (0 <= ky < KH, 0 <= kx < KW, 0 <= c < C, 0 <= m < M) { + weight[ky, kx, c, m] = tosa_mi_data(S, KS, 1, (ky*KW+kx)*C+c, ((ky*KW+kx)*C+c)*M+m); +} +---- + +==== FULLY_CONNECTED + +The following generates input test data for test set S. +For compliant implementation, the test must pass whenever the attributes satisfy: +`N*OC >= MIN_DOT_PRODUCTS` + +[source,c++] +---- +KS = IC; +for (0 <= n < N, 0 <= ic < IC) { + input [ n, ic] = tosa_mi_data(S, KS, 0, ic, n*IC+ic); +} +for (0 <= oc < OC, 0 <= ic < IC) { + weight[oc, ic] = tosa_mi_data(S, KS, 1, ic, oc*IC+ic); +} +---- + +==== MATMUL + +The following generates input test data for test set S. +For compliant implementation, the test must pass whenever the attributes satisfy: +`N*H*W >= MIN_DOT_PRODUCTS` + +[source,c++] +---- +KS = C; +for (0 <= n < N, 0 <= y < H, 0 <= c < C) { + A[n, y, c] = tosa_mi_data(S, KS, 0, c, (n*H+y)*C+c); +} +for (0 <= n < N, 0 <= c < C, 0 <= x < W) { + B[n, c, x] = tosa_mi_data(S, KS, 1, c, (n*C+c)*W+x); +} +---- + +==== TRANSPOSE_CONV2D + +The following generates input test data for test set S. +For compliant implementation, the test must pass whenever the attributes satisfy: +`N*OH*OW*OC >= MIN_DOT_PRODUCTS` + +[source,c++] +---- +KS = KW*KH*IC; +for (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= ic < IC) { + input [ n, iy, ix, ic] = tosa_mi_data(S, KS, 0, ((iy % KH)*KW+(ix % KW))*IC+ic, ((n*IH+iy)*IW+ix)*IC+ic); +} +for (0 <= oc < OC, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { + weight[oc, ky, kx, ic] = tosa_mi_data(S, KS, 1, (ky*KW+kx)*IC+ic, ((oc*KH+ky)*KW+kx)*IC+ic); +} +---- + +==== FFT2D + +The following generates input test data for test set S. +For compliant implementation, the test must pass whenever the attributes satisfy: +`N*H*W >= MIN_DOT_PRODUCTS` + +[source,c++] +---- +KS = 2*H*W; +for (0 <= n < N, 0 <= y < H, 0 <= x < W) { + input_real[n, y, x] = tosa_mi_data(S, KS, 0, y*W+x, ((0*N+n)*H+y)*IW+x); + input_imag[n, y, x] = tosa_mi_data(S, KS, 0, y*W+x, ((1*N+n)*H+y)*IW+x); +} +for (0 <= y < H, 0 <= x < W, 0 <= m < H, 0 <= n < W) { + weight_real[y, x, m, n] = real(exp(2*pi*i*((m*h/H) + (n*w/W)))); + weight_imag[y, x, m, n] = imag(exp(2*pi*i*((m*h/H) + (n*w/W)))); +} +---- + +==== REDUCE_SUM + +The following generates input test data for test set S. +For compliant implementation, the test must pass whenever the attributes satisfy: +`tensor_size(shape) >= MIN_DOT_PRODUCTS` + +[source,c++] +---- +KS = shape1[axis]; +for (index in shape1) { + input[index] = tosa_mi_data(S, KS, 0, index[axis], tensor_index_to_offset(index)); +} +for (0 <= c < KS) { + weight[c] = 1; +} +---- + +==== AVG_POOL2D + +The following generates input test data for test set S. +For compliant implementation, the test must pass whenever the attributes satisfy: +`N*OH*OW*C >= MIN_DOT_PRODUCTS` + +[source,c++] +---- +KS = KY*KX; +for (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= c < C) { + input [ n, iy, ix, c] = tosa_mi_data(S, KS, 0, ((iy % KH)*KW+(ix % KW))*C+c, ((n*IH+iy)*IW+ix)*C+c); +} +for (0 <= ky < KH, 0 <= kx < KW, 0 <= c < C, 0 <= m < M) { + weight[ky, kx] = 1/KS; +} +---- diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc index f3a6454..66bc9bf 100644 --- a/chapters/introduction.adoc +++ b/chapters/introduction.adoc @@ -135,7 +135,10 @@ The TOSA specification is a work in progress. === Compliance This section defines when a TOSA implementation is compliant to a given TOSA specification profile and level. -The term conformant will mean the same as compliant. +To be compliant an implementation must achieve the results and accuracy defined by this specification. +TOSA also defines a set of conformance tests. +A compliant implementation must pass the conformance tests. +The conformance tests are not exhaustive, so an implementation that passes the conformance tests may not be compliant if there is a non-compliance that is undetected by the tests. ==== Base Inference Profile Compliance @@ -177,7 +180,7 @@ bool tosa_test_compliance(tosa_graph_t graph, tosa_list_t input_list, tosa_level } ---- -==== Main Inference Profile +==== Main Inference Profile Compliance A Main Inference compliant implementation must satisfy the following: @@ -216,7 +219,7 @@ The following criteria apply to all operations: | Operation | Accuracy bound | <>, <>, <>, <>, <>, <>, <>, , <>, <> -| The result must be exact. +| Non NaN results must be exact. | <>, <>, <> | The result must be exact with: + @@ -228,19 +231,25 @@ The following criteria apply to all operations: The dot product must meet the <> | <>, <> -| Each output can be expressed as a dot product of an input vector with a costant vector. + +| Each output can be expressed as a dot product of an input vector with a constant coefficient vector. + The dot product must meet the <> -| <>, <>, <>, <>, <>, <> +| <>, <>, <>, <>, <> | Floating-point result overflows must be set to infinity of the correct sign. + Floating-point result underflows must be set to zero of the correct sign. + -Integer result overflows must be saturated. + Addition of infinites of different signs must produce a NaN. + Subtraction of infinities of the same sign must produce a NaN. + Multiplication of an infinity by a zero must produce a NaN. + Otherwise for fp32_t the result must be rounded to the nearest representable value using the round to nearest, ties to even rounding mode. + Otherwise for fp16_t and bf16_t the result must be within 0.5 ulp of the mathematical result. +| <> +| Floating-point result overflows must be set to infinity of the correct sign. + +Floating-point result underflows must be set to zero of the correct sign. + +Cast from floating-point to integer result overflows must be saturated. + +Otherwise for fp32_t the result must be rounded to the nearest representable value using the round to nearest, ties to even rounding mode. + +Otherwise for fp16_t and bf16_t the result must be within 0.5 ulp of the mathematical result. + | <> | If the input is a zero or the result overlows the output must be an infinity of the same sign. + If the input is an infinty or the result underflows the output must be a zero of the same sign. + @@ -264,7 +273,7 @@ Otherwise the result must be within 5 ulp of the mathematical result. This dot product must meet the <> | <> -| Each output can be expressed as a dot product of an input vector with a vector with elements 1/d where d is the kernel size. + +| Each output can be expressed as a dot product of an input vector with a vector with elements 1/KS where KS is the kernel size. + This dot product must meet the <> | <> @@ -277,36 +286,65 @@ where `E = pow(1 + pow(2, -M-1), N) - 1`. In this expression M is the number of ===== Dot product accuracy requirements -This section gives accuracy constraints for operations where the result is a sum of products of N floating-point inputs: - -`y = x[0] * w[0] + x[1] * w[1] + ... + x[N-1] * w[N-1]` - -Let M be the number of mantissa bits in the accumulator. -So M=23 for an `fp32_t` accumulator and M=10 for an `fp16_t` accumulator. - -In this section "fp64 arithmetic" refers to double-precision floating-point arithmetic defined by <>[1]. - -Appendix A, defines a number of <>. -For each data test set (S, N) consisting of T tests the following must hold: - -* For each test t in the range 0 to T-1, calculate: -** `y_imp[t] = x[0] * w[0] + ... + x[N-1] * w[N-1]` calculated by the implementation -** `y_ref[t] = x[0] * w[0] + ... + x[N-1] * w[N-1]` calculated using fp64 arithmetic -** `y_bnd[t] = abs(x[0] * w[0]) + ... + abs(x[N-1] * w[N-1])` calculated using fp64 arithmetic -* if `y_bnd[t] == 0` then -** `y_imp[t]` must be zero and set `y_err[t] = 0` -* if `y_bnd[t] > 0` then set: -** `y_err[t] = (y_imp[t] - y_ref[t]) * (1<<(M+1)) / y_bnd[t]` calculated using fp64 arithmetic -* For each test t the following must be satisfied: -** `y_ref[t], y_bnd[t], y_imp[t]` must be finite -** `abs(y_err[t]) \<= N` -* Calculate the sum of y_err using fp64 arithmetic: -** `y_err_sum = y_err[0] + .... + y_err[T-1]` -* Calculate the sum of y_err squared using fp64 arithmetic: -** `y_err_sumsq = y_err[0] * y_err[0] + ... + y_err[T-1] * y_err[T-1]` -* The error sum and sum squares must satisfy the following. The first equation bounds the bias and the second the error variance. -** `abs(y_err_sum) \<= 2*sqrt(N*T)` -** `y_err_sumsq \<= 0.4*N*T` +This section assumes an operation acting on two tensors named 'input' and 'weight'. +Each output tensor element can be expressed as a dot product of elements between the input and weight tensors. +The dot product has length KS, the kernel size. +Note: KS is defined for each relevant operator in the appendix section <
>. + +In other words each output element `out` can be expressed as a dot product between input elements `in[k]` and weight elements `w[k]`: + +`out = in[0] * w[0] + in[1] * w[1] + ... + in[KS-1] * w[KS-1]` + +The positions of `in[k]` and `w[k]` in the input and weight tensors depends on the operation being performed (for example a convolution). + +This section defines the accuracy required for these operations. +The term "fp64 arithmetic" refers to double-precision floating-point arithmetic defined by <>[1]. + +For an operation with given sizes and attributes to be compliant the following must hold for each data set S defined in <>: + +* Let input be the input tensor generated by <
> for test set S +* Let weight be the weight tensor generated by <
> for test set S +* Let output_ref be the output tensor calculated by the operation using fp64 arithemic +* Let output_imp be the output tensor calculated by the implementation to test +* Let input_abs be the input tensor with each element replaced with its absolute value +* Let weight_abs be the weight tensor with each element replaced with its absolute value +* Let output_bnd be the output tensor calculated using fp64 arithmetic on input_abs and weight_abs + +The following checks must then pass: + +[source,c++] +---- +size_t T = tensor_size(output_shape) // number dot product results +fp64_t out_err_sum = 0.0; +fp64_t out_err_sumsq = 0.0; +fp64_t acc_prec; // 1<<(M+1) where M is the number of mantissa bits +switch (acc_t) { + case fp32_t: acc_prec = (fp64_t)(1<<24); break; + case fp16_t: acc_prec = (fp64_t)(1<<11); break; + default: ERROR_IF(true); +} +for_each(index in output_shape) { + fp64_t out_bnd = tensor_read(output_bnd, output_shape, index); + fp64_t out_ref = tensor_read(output_ref, output_shape, index); + acc_t out_imp = tensor_read (output_imp, output_shape, index); + fp64_t out_err; + if (out_bnd == 0.0) { + REQUIRE(out_ref == 0.0 && out_imp == 0.0); + out_err = 0.0; + } else { // out_bnd > 0.0 + out_err = ((fp64_t)out_imp - out_ref)*acc_prec/out_bnd; + REQUIRE(abs(out_err) <= KS); + } + out_err_sum += out_err; + out_err_sumsq += out_err * out_err; +} +if (S!=1 && S!=2) { + // check output error bias magnitude for data sets S which are not positive biased + REQUIRE(abs(out_err_sum) <= 2*sqrt(KS*T)); +} +// check output error variance magnitude +REQUIRE(out_err_sumsq <= 0.4*KS*T) +---- === Tensor Definitions diff --git a/tools/dictionary.dic b/tools/dictionary.dic index 325db9b..da2e28c 100644 --- a/tools/dictionary.dic +++ b/tools/dictionary.dic @@ -1,6 +1,7 @@ personal_ws-1.1 en 500 activations adoc +acc ARGMAX AsciiDoc BILINEAR @@ -44,6 +45,7 @@ MUL multipler NaN NPUs +OC pre precisions pseudocode -- cgit v1.2.1