diff options
author | Eric Kunze <eric.kunze@arm.com> | 2022-03-11 15:12:38 -0800 |
---|---|---|
committer | Eric Kunze <eric.kunze@arm.com> | 2022-03-15 13:40:39 -0700 |
commit | ce6e02cec3a06d991e112f0f875123f1d1f928dc (patch) | |
tree | 7f2f6aba746321c9b14af6287157db782d7c6c22 | |
parent | 2ff79fe6b31f41685cf2cecfca5410db40440aaf (diff) | |
download | specification-ce6e02cec3a06d991e112f0f875123f1d1f928dc.tar.gz |
Update floating point edge cases
Cover cases where NaN, +/- 0, +/- infinity are involved
Signed-off-by: Eric Kunze <eric.kunze@arm.com>
Change-Id: I1a5a23c7b856ddb997f7cdc00282420294ef3e6d
-rw-r--r-- | chapters/comparison.adoc | 20 | ||||
-rw-r--r-- | chapters/ewise_unary.adoc | 71 | ||||
-rw-r--r-- | chapters/introduction.adoc | 15 | ||||
-rw-r--r-- | chapters/pseudocode.adoc | 21 |
4 files changed, 117 insertions, 10 deletions
diff --git a/chapters/comparison.adoc b/chapters/comparison.adoc index ad574fb..67f3506 100644 --- a/chapters/comparison.adoc +++ b/chapters/comparison.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2021 ARM Limited +// (C) COPYRIGHT 2020-2022 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -32,7 +32,11 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read<in_t>(input1, shape1, index1); in_t value2 = tensor_read<in_t>(input2, shape2, index2); - out_t result = (value1 == value2) ? True : False; + out_t result; + if (isNaN(value1) || isNaN(value2)) + result = False; + else + result = (value1 == value2) ? True : False; tensor_write<out_t>(output, shape, index, result); } ---- @@ -69,7 +73,11 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read<in_t>(input1, shape1, index1); in_t value2 = tensor_read<in_t>(input2, shape2, index2); - out_t result = (value1 > value2) ? True : False; + out_t result; + if (isNaN(value1) || isNaN(value2)) + result = False; + else + result = (value1 > value2) ? True : False; tensor_write<out_t>(output, shape, index, result); } ---- @@ -105,7 +113,11 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read<in_t>(input1, shape1, index1); in_t value2 = tensor_read<in_t>(input2, shape2, index2); - out_t result = (value1 >= value2) ? True : False; + out_t result; + if (isNaN(value1) || isNaN(value2)) + result = False; + else + result = (value1 >= value2) ? True : False; tensor_write<out_t>(output, shape, index, result); } ---- diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc index 326cc3c..ff7aef9 100644 --- a/chapters/ewise_unary.adoc +++ b/chapters/ewise_unary.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2021 ARM Limited +// (C) COPYRIGHT 2020-2022 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -22,13 +22,23 @@ Elementwise absolute value operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|+infinity|+infinity|+0|+0|NaN +|=== + *Operation Function:* [source,c++] ---- for_each(index in shape) { in_out_t value1 = tensor_read<in_out_t>(input1, shape, index); - if (value1 < 0) + if (in_out_t == float_t && value1 == -0.0) { + value1 = 0.0; + } + if (value1 < 0.0) value1 = apply_sub<in_out_t>(0, value1); tensor_write<in_out_t>(output, shape, index, value1); } @@ -90,6 +100,13 @@ Elementwise ceiling operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|-infinity|+infinity|-0|+0|NaN +|=== + *Operation Function:* [source,c++] @@ -153,6 +170,13 @@ Elementwise e to the x operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|+0|+infinity|1|1|NaN +|=== + *Operation Function:* [source,c++] @@ -185,6 +209,13 @@ Elementwise floor operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|-infinity|+infinity|-0|+0|NaN +|=== + *Operation Function:* [source,c++] @@ -217,6 +248,13 @@ Elementwise natural logarithm operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|NaN|+infinity|-infinity|-infinity|NaN +|=== + *Operation Function:* [source,c++] @@ -283,6 +321,13 @@ Elementwise negation operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|+infinity|-infinity|+0|-0|NaN +|=== + *Operation Function:* [source,c++] @@ -322,6 +367,13 @@ Elementwise reciprocal operation. For integer operation, a TABLE should be used |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|-0|+0|-infinity|+infinity|NaN +|=== + *Operation Function:* [source,c++] @@ -354,13 +406,26 @@ Elementwise reciprocal square root operation. For integer operation, a TABLE sho |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|NaN|+0|-infinity|+infinity|NaN +|=== + *Operation Function:* [source,c++] ---- for_each(index in shape) { in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index); - in_out_t result = 1.0 / apply_sqrt<in_out_t>(value1); + in_out_t result; + if (value1 < 0) { + result = NaN; + } + else { + result = 1.0 / apply_sqrt<in_out_t>(value1); + } tensor_write<in_out_t>(output, shape, index, result); } ---- diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc index 2827399..fe4f724 100644 --- a/chapters/introduction.adoc +++ b/chapters/introduction.adoc @@ -351,7 +351,7 @@ Most operations in TOSA do not contain quantization scaling in the operation, bu The apply_scale functions provide a scaling of approximately (multiplier * 2^-shift^). The shift and value range is limited to allow a variety of implementations. The limit of 62 on shift allows the shift to be decomposed as two right shifts of 31. -The limit on value allows implementations that left shift the value before the mutliply in the case of shifts of 32 or less. +The limit on value allows implementations that left shift the value before the multiply in the case of shifts of 32 or less. For example, in the case shift=30 an implementation of the form ((value\<<2) * multiplier + round)>>32 can be used. A scaling range of 2^+12^ down to 2^-32^ is supported for both functions with a normalized multiplier. @@ -482,12 +482,23 @@ These features ensure that detection of overflow and other exceptional condition |=== |Case|Result -|Any input operand is a NaN | a NaN +|Operators other than explicitly mentioned by other rules: Any input operand is a NaN | a NaN +|Comparisons (EQUAL, GREATER, GREATER_EQUAL), where either or both operands is NaN | False + +|Comparisons ignore the sign of 0| + +|RSQRT (reciprocal square root) of negative numbers | a NaN |(± 0) × (± infinity), (± infinity) × (± 0) | a NaN +|LOG of negative numbers | a NaN + +|nonzero numbers / (± 0) | (± infinity) + |(± 0) / (± 0), (± infinity) / (± infinity) | a NaN +|(± infinity) * 0 | a NaN + | (+infinity) - (+infinity), (+infinity) + (-infinity) | a NaN | Any positive overflow | + infinity diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc index 71cc14d..238aa33 100644 --- a/chapters/pseudocode.adoc +++ b/chapters/pseudocode.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2021 ARM Limited +// (C) COPYRIGHT 2021-2022 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -85,14 +85,30 @@ in_t apply_floor<in_t>(in_t input) { } in_t apply_log<in_t>(in_t input) { + if (input == 0) { + return -INFINITY + } + else if (input < 0) { + return NaN; + } return the natural logarithm of input } in_t apply_max<in_t>(in_t a, in_t b) { + if (in_t == float_t) { + if (isNaN(a) || isNaN(b)) { + return NaN; + } + } if (a >= b) return a; else return b; } in_t apply_min<in_t>(in_t a, in_t b) { + if (in_t == float_t) { + if (isNaN(a) || isNaN(b)) { + return NaN; + } + } if (a < b) return a; else return b; } @@ -177,4 +193,7 @@ int rank(in_t input) int sum(in_t input[]) return the sum of values of an input list + +bool isNaN(float input) + return True if floating-point input value is NaN ---- |