From f791b447392c5112946ac9c49b1add83e2bfe7c0 Mon Sep 17 00:00:00 2001
From: Dominic Symes <dominic.symes@arm.com>
Date: Mon, 30 Oct 2023 14:26:11 +0000
Subject: Main Conformance: Update EXP, POW precision

EXP and POW are complex to implement in a precise number of ulp
and a more flexible precision is given.

Additionally, the test generater S=5 has the bias values
changed to zero. If the bias values are non-zero then a
bias for all output tensor elements of the channel can affect
the dot product bias test.

The numeric accuracy helpers are updated for bf16.

Signed-off-by: Dominic Symes <dominic.symes@arm.com>
Change-Id: Ia46a2ef4d577244b6983a08ce850de3db9573a42
---
 chapters/appendix_a.adoc   | 14 +++++++-------
 chapters/introduction.adoc | 34 ++++++++++++++++++++++++----------
 chapters/pseudocode.adoc   | 46 +++++++++++++++++++++++++---------------------
 3 files changed, 56 insertions(+), 38 deletions(-)
diff --git a/chapters/appendix_a.adoc b/chapters/appendix_a.adoc
index ba3b6bb..17007b5 100644
--- a/chapters/appendix_a.adoc
+++ b/chapters/appendix_a.adoc
@@ -126,9 +126,9 @@ The aim of this test set is to check signed inputs of large range.
 [cols="1,9"]
 |===
 | p | tosa_mi_data(S, KS, p, k, i) =
-| 0 | (B/sqrt(KS+1))*set_data(3*S+0, i)
-| 1 | (B/sqrt(KS+1))*set_data(3*S+1, i)
-| 2 | (B*B/(KS+1))*set_data(3*S+2, i)
+| 0 | (B/sqrt(KS))*set_data(3*S+0, i)
+| 1 | (B/sqrt(KS))*set_data(3*S+1, i)
+| 2 | 0.0
 |===
 
 === Main Inference operator test data
@@ -154,7 +154,7 @@ for (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= ic < IC) {
 for (0 <= oc < OC, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) {
   weight[oc, ky, kx, ic] = tosa_mi_data(S, KS, 1, (ky*KW+kx)*IC+ic, ((oc*KH+ky)*KW+kx)*IC+ic);
 }
-for (0 <= oc < OC) {
+for (0 <= oc < BC) {
   bias[oc] = tosa_mi_data(S, KS, 2, oc)
 }
 ----
@@ -174,7 +174,7 @@ for (0 <= n < N, 0 <= id < UD, 0 <= iy < IH, 0 <= ix < IW, 0 <= ic < IC) {
 for (0 <= oc < OC, 0 <= kd < KD, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) {
   weight[oc, kd, ky, kx, ic] = tosa_mi_data(S, KS, 1, ((kd*KH+ky)*KW+kx)*IC+ic, (((oc*KD+kd)*KH+ky)*KW+kx)*IC+ic);
 }
-for (0 <= oc < OC) {
+for (0 <= oc < BC) {
   bias[oc] = tosa_mi_data(S, KS, 2, oc)
 }
 ----
@@ -214,7 +214,7 @@ for (0 <= n < N, 0 <= ic < IC) {
 for (0 <= oc < OC, 0 <= ic < IC) {
   weight[oc, ic] = tosa_mi_data(S, KS, 1, ic, oc*IC+ic);
 }
-for (0 <= oc < OC) {
+for (0 <= oc < BC) {
   bias[oc] = tosa_mi_data(S, KS, 2, oc)
 }
 ----
@@ -251,7 +251,7 @@ for (0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= ic < IC) {
 for (0 <= oc < OC, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) {
   weight[oc, ky, kx, ic] = tosa_mi_data(S, KS, 1, (ky*KW+kx)*IC+ic, ((oc*KH+ky)*KW+kx)*IC+ic);
 }
-for (0 <= oc < OC) {
+for (0 <= oc < BC) {
   bias[oc] = tosa_mi_data(S, KS, 2, oc)
 }
 ----
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index fd9ec25..1fabcc3 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -210,13 +210,6 @@ The function tosa_reference_check_fp() defines the error range permitted by a gi
 
 NOTE: The error criteria in this section are at an early draft stage and are likely to change during conformance test development.
 
-Error criteria are specified for a single operator.
-For a sequence of n operators, A[0] to A[n-1], there must be corresponding implementations, I[0] to I[n-1], such that:
-
-* Each I[k] implements A[k] with same or higher precision datatypes
-* Each I[k] meets the accuracy defined in this specification for the A[k] precision
-* The accuracy of the sequence A[0] to A[n-1] is no worse than the accuracy of the sequence I[0] to I[n-1]
-
 The following criteria apply to all operations:
 
 * If any input is a NaN and the result is floating-point then the result must be a NaN
@@ -268,14 +261,24 @@ Otherwise:the result must be within 1 ulp of the mathematical result.
 Otherwise if the input is a zero the output must be an infinity of the same sign. +
 Otherwise the result must be within 1 ulp of the mathematical result.
 
-| <<SIGMOID>>, <<TANH>>, <<POW>>, <<EXP>>, <<LOG>>, <<ERF>>
+| <<SIGMOID>>, <<TANH>>, <<LOG>>, <<ERF>>
 | If the input to LOG is less than zero then the result must be a NaN. +
-If the inputs to POW are both zero then the result must be a NaN. +
-If the first input to POW is less than zero and the second input is not an integer then the result must be a NaN. +
 If the result overflows the output must be an infinity of the correct sign. +
 If the result underflows the output must be a zero of the correct sign. +
 Otherwise the result must be within 5 ulp of the mathematical result.
 
+| <<EXP>>
+| Let `x` be an input element and `out_imp` the implementation output of `exp(x)`. +
+Let `out_ref` be the result of the fp64_t reference implementation of `exp(x)`. +
+Let `err_bnd = out_ref*exp2(-normal_fraction<in_out_t>)*(1+abs(x))` +
+Then `tosa_reference_check_fp_bnd<in_out_t>(out_imp, out_ref, out_bnd)` must be true
+
+| <<POW>>
+| Let `x`, `y` be input elements and `out_imp` the implementation output of `pow(x,y)`. +
+Let `out_ref` be the result of the fp64_t reference implementation of `pow(x,y)`. +
+Let `err_bnd = out_ref*exp2(-normal_fraction<in_out_t>)*(1+abs(log(abs(x))*y))` +
+Then `tosa_reference_check_fp_bnd<in_out_t>(out_imp, out_ref, out_bnd)` must be true
+
 | <<REDUCE_SUM>>
 | Each output can be expressed as a dot product of an input vector with a vector of ones. +
 This dot product must meet the <<Dot product accuracy requirements>>
@@ -292,6 +295,17 @@ where `E = pow(1 + pow(2, -M-1), N) - 1`. In this expression M is the number of
 
 |===
 
+===== Operator sequence precision requirement
+
+Precision criteria are specified for a single operator.
+
+An implementation M of a sequence of n TOSA operators, A[0] to A[n-1] is said to
+be compliant if M gives the same result as a sequence of implementations
+M[0] to M[n-1] such that:
+
+* Each M[k] implements A[k] with same or higher precision datatypes
+* Each M[k] meets the accuracy defined in this specification for A[k] where the M[k] output is converted to A[k] output precision using round to nearest
+
 ===== Dot product accuracy requirements
 
 This section assumes an operation acting on tensors named 'input', 'weight' and optionally 'bias'.
diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc
index efb50a0..0a8f598 100644
--- a/chapters/pseudocode.adoc
+++ b/chapters/pseudocode.adoc
@@ -440,6 +440,7 @@ int ilog2(fp64_t v) {
 fp64_t normal_min<in_t>() {
   switch (in_t) {
     case fp32_t: return exp2(-126);
+    case bf16_t: return exp2(-126);
     case fp16_t: return exp2( -14);
   }
 }
@@ -447,6 +448,7 @@ fp64_t normal_min<in_t>() {
 fp64_t normal_max<in_t>() {
   switch (in_t) {
     case fp32_t: return exp2(128) - exp2(127-23);
+    case bf16_t: return exp2(128) - exp2(127- 7);
     case fp16_t: return exp2( 16) - exp2( 15-10);
   }
 }
@@ -456,17 +458,30 @@ int normal_frac<in_t> () {
   switch (in_t) {
     case fp32_t: return 23;
     case fp16_t: return 10;
+    case bf16_t: return  7;
   }
 }
 ----
 
-The following function checks if a test value in floating-point format in_t is within an error range compared to a reference value.
-The function assumes that denormal values may be flushed to zero.
-The permitted range error is specified as num_ulp which in this spefication is the permitted range defined by the following function.
+The following functions check if a test value in floating-point format in_t is within an error range compared to a reference value.
+The functions assume that denormal values may be flushed to zero.
+For the first function, the permitted error range is specified as num_ulp which is converted to an error bound as specified by the code.
+For the second function, the permitted error range is specified as an absolute error bound.
 
 [source,c++]
 ----
 bool tosa_reference_check_fp<in_t>(in_t test_value, fp64_t ref_value, fp64_t num_ulp) {
+  fp64_t err_bnd = 0.0;
+  if (is_normal_fp64(ref_value)) {
+    int ref_exp = ilog2(abs(ref_value));
+    fp64_t ref_pow2 = max(exp2(ref_exp), normal_min<in_t>);
+    fp64_t val_ulp  = ref_pow2 * exp2(-normal_frac<in_t>);
+    err_bnd = val_ulp * num_ulp;
+  }
+  return tosa_reference_check_fp_bnd<in_t>(test_value, ref_value, err_bnd);
+}
+
+bool tosa_reference_check_fp_bnd<in_t>(in_t test_value, fp64_t ref_value, fp64_t err_bnd) {
   if (is_a_NaN(ref_value)) {
     return is_a_NaN(test_value);
   }
@@ -474,27 +489,16 @@ bool tosa_reference_check_fp<in_t>(in_t test_value, fp64_t ref_value, fp64_t num
     ref_value  = -ref_value;
     test_value = -test_value;
   }
-  fp64_t ref_min, ref_max;
-  if (ref_value == infinity) {
-    ref_min = infinity;
-    ref_max = infinity;
-  } else if (ref_value == 0) {
-    ref_min = 0;
-    ref_max = 0;
-  } else {
-    int ref_exp = ilog2(ref_value);
-    fp64_t ref_pow2 = max(exp2(ref_exp), normal_min<in_t>);
-    fp64_t val_ulp  = ref_pow2 * exp2(-normal_frac<in_t>);
-    ref_max = ref_value + val_ulp * num_ulp;
-    ref_min = ref_value - val_ulp * num_ulp;
-    if (ref_max > normal_max<in_t>) ref_max = infinity;
-    if (ref_min > normal_max<in_t>) ref_min = infinity;
-    if (ref_max < normal_min<in_t>) ref_max = normal_min<in_t>;
-    if (ref_min < normal_min<in_t>) ref_min = 0;
-  }
+  fp64_t ref_max = ref_value + err_bnd;
+  fp64_t ref_min = ref_value - err_bnd;
+  if (ref_max > normal_max<in_t>) ref_max = infinity;
+  if (ref_min > normal_max<in_t>) ref_min = infinity;
+  if (ref_max < normal_min<in_t>) ref_max = normal_min<in_t>;
+  if (ref_min < normal_min<in_t>) ref_min = 0;
   return (static_cast<fp64_t>(test_value) >= ref_min &&
           static_cast<fp64_t>(test_value) <= ref_max);
 }
+
 ----
 
 ==== Numeric Conversion Helpers
-- 
cgit v1.2.1