Main conformance: Add local bound flag for convolutions

Adds a local_bound flag to convolution operations. If the local_bound flag is true then the output accuracy for floating-point is measured relative to the input tensor data local to the particular convolution output. If the local_bound flag is false then the output accuracy for floating-point is measured relative to the whole input tensor. This accuracy measure is the default and more approapriate for implementations with non local optimizations (such as transform based fast convolutions). Signed-off-by: Dominic Symes <dominic.symes@arm.com> Change-Id: I64e3e4981a63e26e6391149e28d5d71e7ef5560a
author: Dominic Symes <dominic.symes@arm.com> 2023-07-27 11:50:57 +0100
committer: Dominic Symes <dominic.symes@arm.com> 2023-08-16 16:33:20 +0000
commit: b5b067819e5de11153b41cf3d26da4f3f9dd23e8 (patch)
tree: a347a7b4d7f89d5706ced611c3d60e49c1ddc5da
parent: 830b43b1d1bd82edd57dee1f5cac12e2b5cf0e04 (diff)
download: specification-b5b067819e5de11153b41cf3d26da4f3f9dd23e8.tar.gz
3 files changed, 120 insertions, 30 deletions
diff --git a/chapters/appendix_a.adoc b/chapters/appendix_a.adoc
index d4235e8..ba3b6bb 100644
--- a/chapters/appendix_a.adoc
+++ b/chapters/appendix_a.adoc
@@ -37,7 +37,10 @@ This function takes the following arguments:
 
 * S is the test set number which identifies which generator is used
 * KS is the kernel size
-* p is the parameter number of 0 for the first input (usually data) and 1 for the second input (usually weights)
+* p is the parameter number of:
+** 0 for the first input (usually data)
+** 1 for the second input (usually weights)
+** 2 for the third input if present (usually bias)
 * k is the index within the kernel in the range 0 \<= k < KS
 * i is the index within the tensor to write
 
@@ -61,6 +64,7 @@ The aim of this generator is to check that sum of products with zero gives zero
 | p | tosa_mi_data(S, KS, p, k, i) =
 | 0 | set_data(2*S, i) < 0 ? 0.0 : set_data(2*S+1, i)
 | 1 | set_data(2*S, i) < 0 ? set_data(2*S+1, i) : 0.0
+| 2 | 0.0
 |===
 
 ==== Test set S=1
@@ -70,8 +74,9 @@ The aim of this test set is to check values with large exponents.
 [cols="1,9"]
 |===
 | p | tosa_mi_data(S, KS, p, k, i) =
-| 0 | (B/sqrt(KS))*(0.75 + 0.25*set_data(2*S+0, i))
-| 1 | (B/sqrt(KS))*(0.75 + 0.25*set_data(2*S+1, i))
+| 0 | (B/sqrt(KS+1))*(0.75 + 0.25*set_data(3*S+0, i))
+| 1 | (B/sqrt(KS+1))*(0.75 + 0.25*set_data(3*S+1, i))
+| 2 | (B*B/(KS+1))*(0.75 + 0.25*set_data(3*S+2, i))
 |===
 
 ==== Test set S=2
@@ -85,6 +90,7 @@ If the implementation changes the order of the sum, then the test data must also
 | p | tosa_mi_data(S, KS, p, k, i) =
 | 0 | (k==0) ? 1.0 : set_data(2*S+0, i)/sqrt(KS)
 | 1 | (k==0) ? 1.0 : set_data(2*S+1, i)/sqrt(KS)
+| 2 | 0.0
 |===
 
 ==== Test set S=3
@@ -98,6 +104,7 @@ If the implementation changes the order of the sum, then the test data must also
 | p | tosa_mi_data(S, KS, p, k, i) =
 | 0 | (k==0) ? 16.0 : exp(2*set_data(2*S+0, 2*i+0)) * set_data(2*S+0, 2*i+1)
 | 1 | (k==0) ? 16.0 : exp(2*set_data(2*S+1, 2*i+0)) * set_data(2*S+1, 2*i+1)
+| 2 | 0.0
 |===
 
 ==== Test set S=4
@@ -109,6 +116,7 @@ The aim of this test set is to check a mixture of zero and non-zero products.
 | p | tosa_mi_data(S, KS, p, k, i) =
 | 0 | (k==KS/2) ? +0.5 : (set_data(2*S, i) < 0 ? 0.0 : (B/sqrt(KS))*set_data(2*S+1, i))
 | 1 | (k==KS/2) ? -0.5 : (set_data(2*S, i) < 0 ? (B/sqrt(KS))*set_data(2*S+1, i) : 0.0)
+| 2 | 0.0
 |===
 
 ==== Test set S=5
@@ -118,8 +126,9 @@ The aim of this test set is to check signed inputs of large range.
 [cols="1,9"]
 |===
 | p | tosa_mi_data(S, KS, p, k, i) =
-| 0 | (B/sqrt(KS))*set_data(2*S+0, i)
-| 1 | (B/sqrt(KS))*set_data(2*S+1, i)
+| 0 | (B/sqrt(KS+1))*set_data(3*S+0, i)
+| 1 | (B/sqrt(KS+1))*set_data(3*S+1, i)
+| 2 | (B*B/(KS+1))*set_data(3*S+2, i)
 |===
 
 === Main Inference operator test data
@@ -145,6 +154,9 @@ for (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= ic < IC) {
 for (0 <= oc < OC, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) {
   weight[oc, ky, kx, ic] = tosa_mi_data(S, KS, 1, (ky*KW+kx)*IC+ic, ((oc*KH+ky)*KW+kx)*IC+ic);
 }
+for (0 <= oc < OC) {
+  bias[oc] = tosa_mi_data(S, KS, 2, oc)
+}
 ----
 
 ==== CONV3D
@@ -162,6 +174,9 @@ for (0 <= n < N, 0 <= id < UD, 0 <= iy < IH, 0 <= ix < IW, 0 <= ic < IC) {
 for (0 <= oc < OC, 0 <= kd < KD, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) {
   weight[oc, kd, ky, kx, ic] = tosa_mi_data(S, KS, 1, ((kd*KH+ky)*KW+kx)*IC+ic, (((oc*KD+kd)*KH+ky)*KW+kx)*IC+ic);
 }
+for (0 <= oc < OC) {
+  bias[oc] = tosa_mi_data(S, KS, 2, oc)
+}
 ----
 
 ==== DEPTHWISE_CONV2D
@@ -179,6 +194,9 @@ for (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= c < C) {
 for (0 <= ky < KH, 0 <= kx < KW, 0 <= c < C, 0 <= m < M) {
   weight[ky, kx,  c, m] = tosa_mi_data(S, KS, 1, (ky*KW+kx), ((ky*KW+kx)*C+c)*M+m);
 }
+for (0 <= oc < C*M) {
+  bias[oc] = tosa_mi_data(S, KS, 2, oc)
+}
 ----
 
 ==== FULLY_CONNECTED
@@ -196,6 +214,9 @@ for (0 <= n < N, 0 <= ic < IC) {
 for (0 <= oc < OC, 0 <= ic < IC) {
   weight[oc, ic] = tosa_mi_data(S, KS, 1, ic, oc*IC+ic);
 }
+for (0 <= oc < OC) {
+  bias[oc] = tosa_mi_data(S, KS, 2, oc)
+}
 ----
 
 ==== MATMUL
@@ -230,6 +251,9 @@ for (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= ic < IC) {
 for (0 <= oc < OC, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) {
   weight[oc, ky, kx, ic] = tosa_mi_data(S, KS, 1, (ky*KW+kx)*IC+ic, ((oc*KH+ky)*KW+kx)*IC+ic);
 }
+for (0 <= oc < OC) {
+  bias[oc] = tosa_mi_data(S, KS, 2, oc)
+}
 ----
 
 ==== FFT2D
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index 0765e95..d6f7bf9 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -287,64 +287,90 @@ where `E = pow(1 + pow(2, -M-1), N) - 1`. In this expression M is the number of
 
 ===== Dot product accuracy requirements
 
-This section assumes an operation acting on two tensors named 'input' and 'weight'.
-Each output tensor element can be expressed as a dot product of elements between the input and weight tensors.
+This section assumes an operation acting on tensors named 'input', 'weight' and optionally 'bias'.
+Each output tensor element can be expressed as a dot product of elements between the 'input' and 'weight' tensors with optional bias addition.
 The dot product has length KS, the kernel size.
+If the operation does not specify a bias then 'bias' is taken to be zero in this section.
 Note: KS is defined for each relevant operator in the appendix section <<Main Inference operator test data>>.
 
-In other words each output element `out` can be expressed as a dot product between input elements `in[k]` and weight elements `w[k]`:
+In other words, each output element `out` can be expressed as a dot product between input elements `in[k]`, weight elements `w[k]`, bias `b`:
 
-`out = in[0] * w[0] + in[1] * w[1] + ... + in[KS-1] * w[KS-1]`
+`out = in[0] * w[0] + in[1] * w[1] + ... + in[KS-1] * w[KS-1] + b`
 
-The positions of `in[k]` and `w[k]` in the input and weight tensors depends on the operation being performed (for example a convolution).
+The positions of `in[k]`, `w[k]`, `b` in the input, weight and bias tensors depends on the operation being performed.
+This may be, for example, a convolution.
 
 This section defines the accuracy required for these operations.
-The term "fp64 arithmetic" refers to double-precision floating-point arithmetic defined by <<Other publications>>[1].
+In this section:
 
-For an operation with given sizes and attributes to be compliant the following must hold for each data set S defined in <<Appendix A>>:
+* "fp64 arithmetic" refers to double-precision floating-point arithmetic defined by IEEE 754 (<<Other publications>>[1])
+* `operation_fp64()` is an fp64 reference implementation of the operation
+* `operation_imp()` is the implementation under test
+* `local_bound` is defined as follows:
+**  For operations with a local_bound attribute it is the value of the optional attribute, with default value of false
+**  For operations that do not have a local_bound attribute the value is true
 
-* Let input be the input tensor generated by <<Main Inference operator test data>> for test set S
-* Let weight be the weight tensor generated by <<Main Inference operator test data>> for test set S
-* Let output_ref be the output tensor calculated by the operation using fp64 arithemic
-* Let output_imp be the output tensor calculated by the implementation to test
-* Let input_abs  be the input  tensor with each element replaced with its absolute value
-* Let weight_abs be the weight tensor with each element replaced with its absolute value
-* Let output_bnd be the output tensor calculated using fp64 arithmetic on input_abs and weight_abs
+The checks described in the following code must pass for the following data sets:
 
-The following checks must then pass:
+* Data sets defined for the operation in Appendix A <<Main Inference operator test data>>.
+* Data sets that have at least MIN_DOT_PRODUCT different output values. For these data sets we take S=-1.
 
 [source,c++]
 ----
+output_ref = operation_fp64(input, weight, bias);
+output_imp = operation_imp (input, weight, bias);
+input_abs  = abs(input);   // Element-wise absolute
+weight_abs = abs(weight);  // Element-wise absolute
+bias_abs   = abs(bias);    // Element-wise absolute
+if (!local_bound) {
+    input_abs_max = max_value(input_abs);  // maximum over all elements
+    for_each(index in shape(input_abs) {
+        input_abs[index] = input_abs_max;  // set all entries to global maximum
+    }
+}
+output_bnd = operation_fp64(input_abs, weight_abs, bias_abs);
+
 size_t T = tensor_size(output_shape)  // number dot product results
+size_t ksb = (max_value(bias_abs) > 0) ? (KS + 1) : KS; // kernel size and bias
 fp64_t out_err_sum = 0.0;
 fp64_t out_err_sumsq = 0.0;
-fp64_t acc_prec;  // 1<<(M+1) where M is the number of mantissa bits
+fp64_t acc_prec;       // 1<<(M+1) where M is the number of mantissa bits
+fp64_t acc_min_normal; // accumulator minimum normal greater than zero
+fp64_t two_m63 = -1.0/(fp64)((int64_t)-1<<63);         // pow(2,  -63)
 switch (acc_t) {
-    case fp32_t: acc_prec = (fp64_t)(1<<24); break;
-    case fp16_t: acc_prec = (fp64_t)(1<<11); break;
-    default: ERROR_IF(true);
+    case fp32_t: acc_prec       = (fp64_t)(1<<24);     // pow(2,   24)
+                 acc_min_normal = two_m63 * two_m63;   // pow(2, -126)
+                 break;
+    case fp16_t: acc_prec       = (fp64_t)(1<<11);     // pow(2,   11)
+                 acc_min_normal = 1.0/(fp64_t)(1<<14); // pow(2,  -14)
+                 break;
+    default:     ERROR_IF(true);
 }
 for_each(index in output_shape) {
     fp64_t out_bnd = tensor_read<fp64_t>(output_bnd, output_shape, index);
     fp64_t out_ref = tensor_read<fp64_t>(output_ref, output_shape, index);
     acc_t  out_imp = tensor_read<acc_t> (output_imp, output_shape, index);
     fp64_t out_err;
-    if (out_bnd == 0.0) {
+    if ((acc_t)out_bnd == infinity) {
+        // dot product can overflow and there is no accuracy limit
+        out_err = 0.0;
+    } else if (out_bnd == 0.0) {
         REQUIRE(out_ref == 0.0 && out_imp == 0.0);
         out_err = 0.0;
-    } else {  // out_bnd > 0.0
+    } else {  // 0.0 < out_bnd < infinity
+        out_bnd = max(out_bnd, acc_min_normal);
         out_err = ((fp64_t)out_imp - out_ref)*acc_prec/out_bnd;
-        REQUIRE(abs(out_err) <= KS);
+        REQUIRE(abs(out_err) <= ksb);
     }
     out_err_sum   += out_err;
     out_err_sumsq += out_err * out_err;
 }
-if (S!=1 && S!=2) {
+if (input and weights are data set S with 3 <= S <= 5) {
     // check output error bias magnitude for data sets S which are not positive biased
-    REQUIRE(abs(out_err_sum) <= 2*sqrt(KS*T));
+    REQUIRE(abs(out_err_sum) <= 2*sqrt(ksb*T));
 }
 // check output error variance magnitude
-REQUIRE(out_err_sumsq <= 0.4*KS*T)
+REQUIRE(out_err_sumsq <= 0.4*ksb*T)
 ----
 
 === Tensor Definitions
diff --git a/tosa.xml b/tosa.xml
index d3889a2..53f8000 100644
--- a/tosa.xml
+++ b/tosa.xml
@@ -159,6 +159,14 @@
             <description>Weight zero point. Must be zero for non-int8 types.</description>
             <rank min="0" max="0"/>
           </argument>
+          <argument category="attribute" name="local_bound" type="tensor_t" shape="-" tensor-element-type="bool_t" optional="true">
+            <description>
+                This optional attribute affects the floating-point compliance error bound.
+                The default of false allows for direct and transform based, fast convolution algorithms.
+                Only set to true if direct dot-product calculation precision is required.
+            </description>
+            <rank min="0" max="0"/>
+          </argument>
           <argument category="output" name="output" type="tensor_t" shape="[N,OH,OW,OC]" tensor-element-type="out_t">
             <description>Output tensor</description>
             <rank min="4" max="4"/>
@@ -236,6 +244,14 @@
             <description>Weight zero point. Must be zero for non-int8 types.</description>
             <rank min="0" max="0"/>
           </argument>
+          <argument category="attribute" name="local_bound" type="tensor_t" shape="-" tensor-element-type="bool_t" optional="true">
+            <description>
+                This optional attribute affects the floating-point compliance error bound.
+                The default of false allows for direct and transform based, fast convolution algorithms.
+                Only set to true if direct dot-product calculation precision is required.
+            </description>
+            <rank min="0" max="0"/>
+          </argument>
           <argument category="output" name="output" type="tensor_t" shape="[N,OD,OH,OW,OC]" tensor-element-type="out_t">
             <description>Output tensor</description>
             <rank min="5" max="5"/>
@@ -309,6 +325,14 @@
             <description>Weight zero point. Must be zero for non-int8 types.</description>
             <rank min="0" max="0"/>
           </argument>
+          <argument category="attribute" name="local_bound" type="tensor_t" shape="-" tensor-element-type="bool_t" optional="true">
+            <description>
+                This optional attribute affects the floating-point compliance error bound.
+                The default of false allows for direct and transform based, fast convolution algorithms.
+                Only set to true if direct dot-product calculation precision is required.
+            </description>
+            <rank min="0" max="0"/>
+          </argument>
           <argument category="output" name="output" type="tensor_t" shape="[N,OH,OW,C*M]" tensor-element-type="out_t">
             <description>Output tensor</description>
             <rank min="4" max="4"/>
@@ -360,6 +384,14 @@
             <description>Real part of the complex output.</description>
             <rank min="3" max="3"/>
           </argument>
+          <argument category="attribute" name="local_bound" type="tensor_t" shape="-" tensor-element-type="bool_t" optional="true">
+            <description>
+                This optional attribute affects the floating-point compliance error bound.
+                The default of false allows for direct and transform based, fast convolution algorithms.
+                Only set to true if direct dot-product calculation precision is required.
+            </description>
+            <rank min="0" max="0"/>
+          </argument>
           <argument category="output" name="output_imag" type="tensor_t" shape="[N,H,W]" tensor-element-type="in_out_t">
             <description>Imaginary part of the complex output.</description>
             <rank min="3" max="3"/>
@@ -592,6 +624,14 @@
             <description>Weight zero point. Must be zero for non-int8 types.</description>
             <rank min="0" max="0"/>
           </argument>
+          <argument category="attribute" name="local_bound" type="tensor_t" shape="-" tensor-element-type="bool_t" optional="true">
+            <description>
+                This optional attribute affects the floating-point compliance error bound.
+                The default of false allows for direct and transform based, fast convolution algorithms.
+                Only set to true if direct dot-product calculation precision is required.
+            </description>
+            <rank min="0" max="0"/>
+          </argument>
           <argument category="output" name="output" type="tensor_t" shape="[N,OH,OW,OC]" tensor-element-type="out_t">
             <description>Output tensor</description>
             <rank min="4" max="4"/>
author	Dominic Symes <dominic.symes@arm.com>	2023-07-27 11:50:57 +0100
committer	Dominic Symes <dominic.symes@arm.com>	2023-08-16 16:33:20 +0000
commit	b5b067819e5de11153b41cf3d26da4f3f9dd23e8 (patch)
tree	a347a7b4d7f89d5706ced611c3d60e49c1ddc5da
parent	830b43b1d1bd82edd57dee1f5cac12e2b5cf0e04 (diff)
download	specification-b5b067819e5de11153b41cf3d26da4f3f9dd23e8.tar.gz