Update floating point edge cases

Cover cases where NaN, +/- 0, +/- infinity are involved Signed-off-by: Eric Kunze <eric.kunze@arm.com> Change-Id: I1a5a23c7b856ddb997f7cdc00282420294ef3e6d
author: Eric Kunze <eric.kunze@arm.com> 2022-03-11 15:12:38 -0800
committer: Eric Kunze <eric.kunze@arm.com> 2022-03-15 13:40:39 -0700
commit: ce6e02cec3a06d991e112f0f875123f1d1f928dc (patch)
tree: 7f2f6aba746321c9b14af6287157db782d7c6c22
parent: 2ff79fe6b31f41685cf2cecfca5410db40440aaf (diff)
download: specification-ce6e02cec3a06d991e112f0f875123f1d1f928dc.tar.gz
4 files changed, 117 insertions, 10 deletions
diff --git a/chapters/comparison.adoc b/chapters/comparison.adoc
index ad574fb..67f3506 100644
--- a/chapters/comparison.adoc
+++ b/chapters/comparison.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020-2021 ARM Limited
+// (C) COPYRIGHT 2020-2022 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -32,7 +32,11 @@ for_each(index in shape) {
     index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-    out_t result = (value1 == value2) ? True : False;
+    out_t result;
+    if (isNaN(value1) || isNaN(value2))
+        result = False;
+    else
+        result = (value1 == value2) ? True : False;
     tensor_write<out_t>(output, shape, index, result);
 }
 ----
@@ -69,7 +73,11 @@ for_each(index in shape) {
     index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-    out_t result = (value1 > value2) ? True : False;
+    out_t result;
+    if (isNaN(value1) || isNaN(value2))
+        result = False;
+    else
+        result = (value1 > value2) ? True : False;
     tensor_write<out_t>(output, shape, index, result);
 }
 ----
@@ -105,7 +113,11 @@ for_each(index in shape) {
     index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-    out_t result = (value1 >= value2) ? True : False;
+    out_t result;
+    if (isNaN(value1) || isNaN(value2))
+        result = False;
+    else
+        result = (value1 >= value2) ? True : False;
     tensor_write<out_t>(output, shape, index, result);
 }
 ----
diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc
index 326cc3c..ff7aef9 100644
--- a/chapters/ewise_unary.adoc
+++ b/chapters/ewise_unary.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020-2021 ARM Limited
+// (C) COPYRIGHT 2020-2022 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -22,13 +22,23 @@ Elementwise absolute value operation
 |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
 |===
 
+*Floating-point behavior:*
+|===
+|Input|-infinity|+infinity|-0|+0|NaN
+
+|Output|+infinity|+infinity|+0|+0|NaN
+|===
+
 *Operation Function:*
 
 [source,c++]
 ----
 for_each(index in shape) {
     in_out_t value1 = tensor_read<in_out_t>(input1, shape, index);
-    if (value1 < 0)
+    if (in_out_t == float_t && value1 == -0.0) {
+        value1 = 0.0;
+    }
+    if (value1 < 0.0)
         value1 = apply_sub<in_out_t>(0, value1);
     tensor_write<in_out_t>(output, shape, index, value1);
 }
@@ -90,6 +100,13 @@ Elementwise ceiling operation
 |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
 |===
 
+*Floating-point behavior:*
+|===
+|Input|-infinity|+infinity|-0|+0|NaN
+
+|Output|-infinity|+infinity|-0|+0|NaN
+|===
+
 *Operation Function:*
 
 [source,c++]
@@ -153,6 +170,13 @@ Elementwise e to the x operation
 |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
 |===
 
+*Floating-point behavior:*
+|===
+|Input|-infinity|+infinity|-0|+0|NaN
+
+|Output|+0|+infinity|1|1|NaN
+|===
+
 *Operation Function:*
 
 [source,c++]
@@ -185,6 +209,13 @@ Elementwise floor operation
 |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
 |===
 
+*Floating-point behavior:*
+|===
+|Input|-infinity|+infinity|-0|+0|NaN
+
+|Output|-infinity|+infinity|-0|+0|NaN
+|===
+
 *Operation Function:*
 
 [source,c++]
@@ -217,6 +248,13 @@ Elementwise natural logarithm operation
 |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
 |===
 
+*Floating-point behavior:*
+|===
+|Input|-infinity|+infinity|-0|+0|NaN
+
+|Output|NaN|+infinity|-infinity|-infinity|NaN
+|===
+
 *Operation Function:*
 
 [source,c++]
@@ -283,6 +321,13 @@ Elementwise negation operation
 |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
 |===
 
+*Floating-point behavior:*
+|===
+|Input|-infinity|+infinity|-0|+0|NaN
+
+|Output|+infinity|-infinity|+0|-0|NaN
+|===
+
 *Operation Function:*
 
 [source,c++]
@@ -322,6 +367,13 @@ Elementwise reciprocal operation. For integer operation, a TABLE should be used
 |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
 |===
 
+*Floating-point behavior:*
+|===
+|Input|-infinity|+infinity|-0|+0|NaN
+
+|Output|-0|+0|-infinity|+infinity|NaN
+|===
+
 *Operation Function:*
 
 [source,c++]
@@ -354,13 +406,26 @@ Elementwise reciprocal square root operation. For integer operation, a TABLE sho
 |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor
 |===
 
+*Floating-point behavior:*
+|===
+|Input|-infinity|+infinity|-0|+0|NaN
+
+|Output|NaN|+0|-infinity|+infinity|NaN
+|===
+
 *Operation Function:*
 
 [source,c++]
 ----
 for_each(index in shape) {
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index);
-    in_out_t result = 1.0 / apply_sqrt<in_out_t>(value1);
+    in_out_t result;
+    if (value1 < 0) {
+        result = NaN;
+    }
+    else {
+        result = 1.0 / apply_sqrt<in_out_t>(value1);
+    }
     tensor_write<in_out_t>(output, shape, index, result);
 }
 ----
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index 2827399..fe4f724 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -351,7 +351,7 @@ Most operations in TOSA do not contain quantization scaling in the operation, bu
 The apply_scale functions provide a scaling of approximately (multiplier * 2^-shift^).
 The shift and value range is limited to allow a variety of implementations.
 The limit of 62 on shift allows the shift to be decomposed as two right shifts of 31.
-The limit on value allows implementations that left shift the value before the mutliply in the case of shifts of 32 or less.
+The limit on value allows implementations that left shift the value before the multiply in the case of shifts of 32 or less.
 For example, in the case shift=30 an implementation of the form ((value\<<2) * multiplier + round)>>32 can be used.
 A scaling range of 2^+12^ down to 2^-32^ is supported for both functions with a normalized multiplier.
 
@@ -482,12 +482,23 @@ These features ensure that detection of overflow and other exceptional condition
 |===
 |Case|Result
 
-|Any input operand is a NaN | a NaN
+|Operators other than explicitly mentioned by other rules: Any input operand is a NaN | a NaN
 
+|Comparisons (EQUAL, GREATER, GREATER_EQUAL), where either or both operands is NaN | False
+
+|Comparisons ignore the sign of 0|
+
+|RSQRT (reciprocal square root) of negative numbers | a NaN
 |(&#177; 0) &#215; (&#177; infinity), (&#177; infinity) &#215; (&#177; 0) | a NaN
 
+|LOG of negative numbers | a NaN
+
+|nonzero numbers / (&#177; 0) | (&#177; infinity)
+
 |(&#177; 0) / (&#177; 0), (&#177; infinity) / (&#177; infinity) | a NaN
 
+|(&#177; infinity) * 0 | a NaN
+
 | (+infinity) - (+infinity),  (+infinity) + (-infinity) | a NaN
 
 | Any positive overflow | + infinity
diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc
index 71cc14d..238aa33 100644
--- a/chapters/pseudocode.adoc
+++ b/chapters/pseudocode.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2021 ARM Limited
+// (C) COPYRIGHT 2021-2022 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -85,14 +85,30 @@ in_t apply_floor<in_t>(in_t input) {
 }
 
 in_t apply_log<in_t>(in_t input) {
+    if (input == 0) {
+        return -INFINITY
+    }
+    else if (input < 0) {
+        return NaN;
+    }
     return the natural logarithm of input
 }
 
 in_t apply_max<in_t>(in_t a, in_t b) {
+    if (in_t == float_t) {
+        if (isNaN(a) || isNaN(b)) {
+            return NaN;
+        }
+    }
     if (a >= b) return a; else return b;
 }
 
 in_t apply_min<in_t>(in_t a, in_t b) {
+    if (in_t == float_t) {
+        if (isNaN(a) || isNaN(b)) {
+            return NaN;
+        }
+    }
     if (a < b) return a; else return b;
 }
 
@@ -177,4 +193,7 @@ int rank(in_t input)
 
 int sum(in_t input[])
     return the sum of values of an input list
+
+bool isNaN(float input)
+    return True if floating-point input value is NaN
 ----
author	Eric Kunze <eric.kunze@arm.com>	2022-03-11 15:12:38 -0800
committer	Eric Kunze <eric.kunze@arm.com>	2022-03-15 13:40:39 -0700
commit	ce6e02cec3a06d991e112f0f875123f1d1f928dc (patch)
tree	7f2f6aba746321c9b14af6287157db782d7c6c22
parent	2ff79fe6b31f41685cf2cecfca5410db40440aaf (diff)
download	specification-ce6e02cec3a06d991e112f0f875123f1d1f928dc.tar.gz