From c386a052b8e1cd109ab4888c87cd3073a74edaa2 Mon Sep 17 00:00:00 2001
From: Dominic Symes <dominic.symes@arm.com>
Date: Fri, 20 Jan 2023 16:09:31 +0000
Subject: Main inference conformance draft

This patch contains a draft of the
Main Inference profile floating-point
conformance requirements.

Change-Id: Ib1201fff7f4015668b2de6dccf4cd85b419c3f76
Signed-off-by: Dominic Symes <dominic.symes@arm.com>
---
 chapters/appendix_a.adoc   |  66 +++++++++++++++
 chapters/introduction.adoc | 198 ++++++++++++++++++++++++++++++++-------------
 chapters/pseudocode.adoc   |   3 +-
 tools/dictionary.dic       |   4 +
 tosa_spec.adoc             |   2 +
 5 files changed, 218 insertions(+), 55 deletions(-)
 create mode 100644 chapters/appendix_a.adoc
diff --git a/chapters/appendix_a.adoc b/chapters/appendix_a.adoc
new file mode 100644
index 0000000..33a4f11
--- /dev/null
+++ b/chapters/appendix_a.adoc
@@ -0,0 +1,66 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2023 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+== Appendix A
+
+NOTE: This appendix is at an early stage of development at this point in time
+
+=== Random data generation
+
+The following function generates a pseudo-random floating-point value in the range -1.0 to +1.0 for use as test data.
+It uses a modulo (1<<32) recurrent sequence with multiplier derived from "TOSASETS" and the set number.
+
+[source,c++]
+----
+float set_data(uint32_t set, uint32_t index)
+{
+    uint32_t m = (8*set + 1) * 0x705A5E75;   // mod (1<<32) calculation
+    uint32_t r = m + 1;                      // mod (1<<32) calculation
+    for (uint32_t i = 0; i < index; i++) {
+        r = r * m + 1;                       // mod (1<<32) calculation
+    }
+    float  sign = (r>>31)==0 ? +1 : -1;
+    return sign * (float)(r & 0x7FFFFFFF) / (float)(0x7FFFFFFF);
+}
+----
+
+=== Dot product floating-point test data sets
+
+Each test set is indexed by a pair (S, N) where:
+
+* S is the test set number
+* N is the number of elements in a single test vector
+
+Each test set (S, N) contains multiple tests that statistics are calculated over.
+The parameter T is the number of tests in a given set.
+In the table below, t is the test number within a set in the range 0 to T-1.
+
+[cols="1,1,1,5,5"]
+|===
+| Set S | N range | T | x[k] formula for k < N | w[k] formula for k < N
+
+| 0
+| 2-25,50,100,1000
+| 10
+| x[k]=set_data(S, 2*t*N+2*k) < 0 ? 0.0 : set_data(S, 2*t*N+2*k+1)
+| w[k]=set_data(S, 2*t*N+2*k) < 0 ? set_data(S, 2*t*N+2*k+1) : 0.0
+
+| 1
+| 2-25,50,100,1000
+| 1000
+| x[k]=2.0*set_data(S,  2*t*N + k)
+| w[k]=2.0*set_data(S, (2*t+1)*N + k)
+
+| 2
+| 2-25,50,100,1000
+| 1000
+| x[0]=1.0, x[k]=set_data(S, 2*t*N + k)/sqrt(N) for k>0
+| w[0]=1.0, w[k]=set_data(S, (2*t+1)*N + k)/sqrt(N) for k>0
+
+|===
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index 5a2b9a1..5765f22 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020-2022 ARM Limited
+// (C) COPYRIGHT 2020-2023 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -141,7 +141,7 @@ The TOSA specification is a work in progress.
 This section defines when a TOSA implementation is compliant to a given TOSA specification profile and level.
 The term conformant will mean the same as compliant.
 
-==== Baseline Inference Profile Compliance
+==== Base Inference Profile Compliance
 
 The <<Operator Graphs>> section of this specification defines a TOSA graph and the behavior defined for a TOSA graph.
 This behavior is captured in the pseudo-code function tosa_execute_graph().
@@ -181,16 +181,133 @@ bool tosa_test_compliance(tosa_graph_t graph, tosa_list_t input_list, tosa_level
 }
 ----
 
-==== Main Inference and Main Training Profile
+==== Main Inference Profile
 
-An implementation is compliant to the Main Inference or Main Training profiles if the following both hold for that respective profile:
+A Main Inference compliant implementation must satisfy the following:
+
+* The implementation must meet <<Base Inference Profile Compliance>> for all Base inference complaint graphs
+* The implementation must support all Main Inference operations using the datatype fp32_t
+** The operations must meet the precision requirements of <<Main Inference precision requirements>>
+* The implementation must support all Main Inference operations using the datatype fp16_t
+** The operations must meet the precision requirements of <<Main Inference precision requirements>>
+** Note: These requirements allow fp16_t operations to be implemented using the fp32_t datatype
+* The implementation must support all Main Inference operations using the datatype bf16_t
+** The operations must meet the precision requirements of <<Main Inference precision requirements>>
+** Note: These requirements allow bf16_t operations to be implemented using the fp32_t datatype
+
+As with <<Base Inference Profile Compliance>> the pseudo-code function tosa_execute_graph() can return one of three possible results.
+A compliant implementation must satisfy the following:
 
 * For a graph returning tosa_error the implementation must also return an error
 * For a graph returning tosa_valid the implementation must execute the entire graph without error
 * For a graph returning tosa_valid and consisting only of integer operators the results must match exactly
-* The implementation must report the maximum relative error on a set of standard graphs that contain floating point operators. These graphs will be provided as a future appendix to this specification.
 
-Note that for graphs containing floating point there is no strict precision requirement that must be met, but that the precision achieved must be reported.
+===== Main Inference precision requirements
+
+In a compliant implementation, individual-floating point operations within the graph must meet the following accuracy bounds
+listed in the table below. In the table _ulp_ means unit of the last place.
+
+NOTE: The error criteria in this section are at an early draft stage and are likely to change during conformance test development.
+
+The following criteria apply to all operations:
+
+* If any input is a NaN and the result is floating-point then the result must be a NaN
+* If any input is a NaN and the operation is a comparison (greater, greater-equal, equal) then the result must be false
+* if any input is a NaN and the operation is conversion to an integer or boolean then the result is unpredictable
+
+[cols="1,3"]
+|===
+| Operation | Accuracy bound
+
+| <<ARGMAX>>, <<MAX_POOL2D>>, <<CLAMP>>, <<MAXIMUM>>, <<MINIMUM>>, <<ABS>>, <<NEGATE>>, , <<CONST>>, <<IDENTITY>>
+| The result must be exact.
+
+| <<EQUAL>>, <<GREATER>>, <<GREATER_EQUAL>>
+| The result must be exact with: +
+(1) The sign of the zero is ignored +
+(2) Infinities of the same sign compare as equal
+
+| <<CONV2D>>, <<CONV3D>>, <<DEPTHWISE_CONV2D>>, <<FULLY_CONNECTED>>, <<MATMUL>>, <<TRANSPOSE_CONV2D>>
+| Each output can be expressed as a dot product of two input vectors. +
+The dot product must meet the <<Dot product accuracy requirements>>
+
+| <<FFT2D>>, <<RFFT2D>>
+| Each output can be expressed as a dot product of an input vector with a costant vector. +
+The dot product must meet the <<Dot product accuracy requirements>>
+
+| <<ADD>>, <<MUL>>, <<SUB>>, <<CEIL>>, <<FLOOR>>, <<CAST>>
+| Floating-point result overflows must be set to infinity of the correct sign. +
+Floating-point result underflows must be set to zero of the correct sign. +
+Integer result overflows must be saturated. +
+Addition of infinites of different signs must produce a NaN. +
+Subtraction of infinities of the same sign must produce a NaN. +
+Multiplication of an infinity by a zero must produce a NaN. +
+Otherwise for fp32_t the result must be rounded to the nearest representable value using the round to nearest, ties to even rounding mode. +
+Otherwise for fp16_t and bf16_t the result must be within 0.5 ulp of the mathematical result.
+
+| <<RECIPROCAL>>
+| If the input is a zero or the result overlows the output must be an infinity of the same sign. +
+If the input is an infinty or the result underflows the output must be a zero of the same sign. +
+Otherwise:the result must be within 1 ulp of the mathematical result.
+
+| <<RSQRT>>
+| If the input is less than zero the result must be a NaN. +
+Otherwise if the input is a zero the output must be an infinity of the same sign. +
+Otherwise the result must be within 1 ulp of the mathematical result.
+
+| <<SIGMOID>>, <<TANH>>, <<POW>>, <<EXP>>, <<LOG>>
+| If the input to LOG is less than zero then the result must be a NaN. +
+If the inputs to POW are both zero then the result must be a NaN. +
+If the first input to POW is less than zero and the second input is not an integer then the result must be a NaN. +
+If the result overflows the output must be an infinity of the correct sign. +
+If the result underflows the output must be a zero of the correct sign. +
+Otherwise the result must be within 5 ulp of the mathematical result.
+
+| <<REDUCE_SUM>>
+| Each output can be expressed as a dot product of an input vector with a vector of ones. +
+This dot product must meet the <<Dot product accuracy requirements>>
+
+| <<AVG_POOL2D>>
+| Each output can be expressed as a dot product of an input vector with a vector with elements 1/d where d is the kernel size. +
+This dot product must meet the <<Dot product accuracy requirements>>
+
+| <<REDUCE_PRODUCT>>
+| Result overflows must be set to an infinity of the correct sign. +
+Result underflows must be set to a zero of the correct sign. +
+Othewise if the final product and all sub-products are within the normal range then the result `R` must have an absolute error of at most `E*abs\(R)`
+where `E = pow(1 + pow(2, -M-1), N) - 1`. In this expression M is the number of mantissa bit of the floating point format and N is the number of elements in the product.
+
+|===
+
+===== Dot product accuracy requirements
+
+This section gives accuracy constraints for operations where the result is a sum of products of N floating-point inputs:
+
+`y = x[0] * w[0] + x[1] * w[1] + ... + x[N-1] * w[N-1]`
+
+Let M be the number of mantissa bits in the accumulator.
+So M=23 for an `fp32_t` accumulator and M=10 for an `fp16_t` accumulator.
+
+In this section "fp64 arithmetic" refers to double-precision floating-point arithmetic defined by <<Other publications>>[1].
+
+Appendix A, defines a number of <<Dot product floating-point test data sets>>.
+For each data test set (S, N) consisting of T tests the following must hold:
+
+* For each test t in the range 0 to T-1, calculate:
+** `y_imp[t] = x[0] * w[0] + ... + x[N-1] * w[N-1]` calculated by the implementation
+** `y_ref[t] = x[0] * w[0] + ... + x[N-1] * w[N-1]` calculated using fp64 arithmetic
+** `y_bnd[t] = abs(x[0] * w[0]) + ... + abs(x[N-1] * w[N-1])` calculated using fp64 arithmetic
+* if `y_bnd[t] == 0` then
+** `y_imp[t]` must be zero and set `y_err[t] = 0`
+* if `y_bnd[t] > 0` then set:
+** `y_err[t] = abs(y_imp[t] - y_ref[t]) * (1<<(M+1)) / y_bnd[t]` calculated using fp64 arithmetic
+* For each test t the following must be satified:
+** `y_ref[t], y_bnd[t], y_imp[t]` must be finite
+** `y_err[t] \<= N`
+* Calculate the mean-square (variance from 0) of y_err using 64 bit IEEE floating point arithmetic:
+**  `y_var = (y_err[0] * y_err[0] + ... + y_err[N-1] * y_err[N-1]) / T`
+* The mean-square must satisfy:
+**  `y_var \<= N/3`
 
 === Tensor Definitions
 
@@ -303,17 +420,32 @@ The number formats supported by a given operator are listed in its table of supp
 |fp16_t
 | -infinity
 | +infinity
-| 16-bit floating-point value.
+| 16-bit half-precision floating-point defined by <<Other publications>>[1]. +
+Normal values must be supported. +
+Denormal values must either be supported or flushed to zero. +
+Positive and negative infinity must be supported. +
+At least one NaN encoding must be supported. +
+Signed zero must be supported.
 
 |bf16_t
 | -infinity
 | +infinity
-| 16-bit brain float value.
+| 16-bit brain floating-point defined as bits [31:16] of the fp32_t format. +
+Normal values must be supported. +
+Denormal values must either be supported or flushed to zero. +
+Positive and negative infinity must be supported. +
+At least one NaN encoding must be supported. +
+Signed zero must be supported.
 
 |fp32_t
 | -infinity
 | +infinity
-| 32-bit floating-point value.
+| 16-bit single-precision floating-point defined by <<Other publications>>[1]. +
+Normal values must be supported. +
+Denormal values must either be supported or flushed to zero. +
+Positive and negative infinity must be supported. +
+At least one NaN encoding must be supported. +
+Signed zero must be supported.
 |===
 
 Note: In this specification minimum<type> and maximum<type> will denote the minimum and maximum values of the data as stored in memory (ignoring the zero point).
@@ -476,50 +608,8 @@ void generate_lookup_table(int16_t *table, int32_t (*reference)(int32_t))
 }
 ----
 
-=== Floating-point
-
-Floating-point support is included in the main inference profile.
-TOSA does not define bit-exact behavior of the floating-point type, since floating-point operation results can vary according to operation order (floating-point addition is not associative in general) and rounding behavior.
-If a bit-exact answer is required then integer operations should be used.
-TOSA does define that the floating-point type must support the following list of features.
-These features ensure that detection of overflow and other exceptional conditions can be handled consistently.
+=== Other publications
 
-* The floating-point type must have at least 16 total bits including the sign bit
-* The floating-point type must support positive and negative infinity values
-* The floating-point type must support at least one Not-a-Number encoding (NaN)
-* The floating-point type must support signed zero
-* The floating-point type must support handling of infinities, NaNs, zeros as in the following table
-
-.floating-point behavior
-|===
-|Case|Result
-
-|Operators other than explicitly mentioned by other rules: Any input operand is a NaN | a NaN
-
-|Comparisons (EQUAL, GREATER, GREATER_EQUAL), where either or both operands is NaN | False
-
-|Comparisons ignore the sign of 0|
-
-|RSQRT (reciprocal square root) of negative numbers | a NaN
-|(&#177; 0) &#215; (&#177; infinity), (&#177; infinity) &#215; (&#177; 0) | a NaN
-
-|LOG of negative numbers | a NaN
-
-|nonzero numbers / (&#177; 0) | (&#177; infinity)
-
-|(&#177; 0) / (&#177; 0), (&#177; infinity) / (&#177; infinity) | a NaN
-
-|(&#177; infinity) * 0 | a NaN
-
-| (+infinity) - (+infinity),  (+infinity) + (-infinity) | a NaN
-
-| Any positive overflow | + infinity
-
-| Any negative overflow | - infinity
-
-| Any positive underflow | + 0
-
-| Any negative underflow | - 0
-
-|===
+The following publications are referred to in this specification, or provide more information:
 
+. IEEE Std 754-2008, _IEEE Standard for Floating-point Arithmetic_, August 2008.
\ No newline at end of file
diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc
index f4fd885..db699d1 100644
--- a/chapters/pseudocode.adoc
+++ b/chapters/pseudocode.adoc
@@ -252,10 +252,11 @@ See <<Number formats>> for details on the floating-point formats.
 ----
 int round_to_nearest_int(float_t f)
   Converts the floating-point value to f, with rounding to the nearest integer value.
+  For the required precision see the section: Main inference precision requirements.
 
 float_t round_to_nearest_float(in_t f)
   Converts the input value into floating-point, rounding to the nearest representable value.
-  The behavior for ties is implementation dependent.
+  For the required precision see the section: Main inference precision requirements.
 
 out_t sign_extend(in_t input)
   Only valid for two's complement integer values where out_t has more bits than in_t.
diff --git a/tools/dictionary.dic b/tools/dictionary.dic
index 4fa9ffd..325db9b 100644
--- a/tools/dictionary.dic
+++ b/tools/dictionary.dic
@@ -23,6 +23,7 @@ DEPTHWISE
 Elementwise
 FFT
 fft
+fp
 foreach
 Fulbourn
 GPUs
@@ -41,6 +42,7 @@ md
 MERCHANTABILITY
 MUL
 multipler
+NaN
 NPUs
 pre
 precisions
@@ -71,6 +73,8 @@ tensorinfo
 TFLite
 tosa
 TOSA
+TOSASETS
 TPUs
+ulp
 unary
 Unary
diff --git a/tosa_spec.adoc b/tosa_spec.adoc
index 15a74d3..64611f0 100644
--- a/tosa_spec.adoc
+++ b/tosa_spec.adoc
@@ -23,3 +23,5 @@ include::chapters/introduction.adoc[]
 include::chapters/operators.adoc[]
 
 include::chapters/pseudocode.adoc[]
+
+include::chapters/appendix_a.adoc[]
-- 
cgit v1.2.1