From ce6e02cec3a06d991e112f0f875123f1d1f928dc Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Fri, 11 Mar 2022 15:12:38 -0800 Subject: Update floating point edge cases Cover cases where NaN, +/- 0, +/- infinity are involved Signed-off-by: Eric Kunze Change-Id: I1a5a23c7b856ddb997f7cdc00282420294ef3e6d --- chapters/comparison.adoc | 20 ++++++++++--- chapters/ewise_unary.adoc | 71 ++++++++++++++++++++++++++++++++++++++++++++-- chapters/introduction.adoc | 15 ++++++++-- chapters/pseudocode.adoc | 21 +++++++++++++- 4 files changed, 117 insertions(+), 10 deletions(-) diff --git a/chapters/comparison.adoc b/chapters/comparison.adoc index ad574fb..67f3506 100644 --- a/chapters/comparison.adoc +++ b/chapters/comparison.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2021 ARM Limited +// (C) COPYRIGHT 2020-2022 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -32,7 +32,11 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - out_t result = (value1 == value2) ? True : False; + out_t result; + if (isNaN(value1) || isNaN(value2)) + result = False; + else + result = (value1 == value2) ? True : False; tensor_write(output, shape, index, result); } ---- @@ -69,7 +73,11 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - out_t result = (value1 > value2) ? True : False; + out_t result; + if (isNaN(value1) || isNaN(value2)) + result = False; + else + result = (value1 > value2) ? True : False; tensor_write(output, shape, index, result); } ---- @@ -105,7 +113,11 @@ for_each(index in shape) { index2 = apply_broadcast(shape, shape2, index); in_t value1 = tensor_read(input1, shape1, index1); in_t value2 = tensor_read(input2, shape2, index2); - out_t result = (value1 >= value2) ? True : False; + out_t result; + if (isNaN(value1) || isNaN(value2)) + result = False; + else + result = (value1 >= value2) ? True : False; tensor_write(output, shape, index, result); } ---- diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc index 326cc3c..ff7aef9 100644 --- a/chapters/ewise_unary.adoc +++ b/chapters/ewise_unary.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2021 ARM Limited +// (C) COPYRIGHT 2020-2022 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -22,13 +22,23 @@ Elementwise absolute value operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|+infinity|+infinity|+0|+0|NaN +|=== + *Operation Function:* [source,c++] ---- for_each(index in shape) { in_out_t value1 = tensor_read(input1, shape, index); - if (value1 < 0) + if (in_out_t == float_t && value1 == -0.0) { + value1 = 0.0; + } + if (value1 < 0.0) value1 = apply_sub(0, value1); tensor_write(output, shape, index, value1); } @@ -90,6 +100,13 @@ Elementwise ceiling operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|-infinity|+infinity|-0|+0|NaN +|=== + *Operation Function:* [source,c++] @@ -153,6 +170,13 @@ Elementwise e to the x operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|+0|+infinity|1|1|NaN +|=== + *Operation Function:* [source,c++] @@ -185,6 +209,13 @@ Elementwise floor operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|-infinity|+infinity|-0|+0|NaN +|=== + *Operation Function:* [source,c++] @@ -217,6 +248,13 @@ Elementwise natural logarithm operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|NaN|+infinity|-infinity|-infinity|NaN +|=== + *Operation Function:* [source,c++] @@ -283,6 +321,13 @@ Elementwise negation operation |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|+infinity|-infinity|+0|-0|NaN +|=== + *Operation Function:* [source,c++] @@ -322,6 +367,13 @@ Elementwise reciprocal operation. For integer operation, a TABLE should be used |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|-0|+0|-infinity|+infinity|NaN +|=== + *Operation Function:* [source,c++] @@ -354,13 +406,26 @@ Elementwise reciprocal square root operation. For integer operation, a TABLE sho |Output|in_out_t*|output|shape|Output tensor of same type, size as the input tensor |=== +*Floating-point behavior:* +|=== +|Input|-infinity|+infinity|-0|+0|NaN + +|Output|NaN|+0|-infinity|+infinity|NaN +|=== + *Operation Function:* [source,c++] ---- for_each(index in shape) { in_out_t value1 = tensor_read(input1, shape1, index); - in_out_t result = 1.0 / apply_sqrt(value1); + in_out_t result; + if (value1 < 0) { + result = NaN; + } + else { + result = 1.0 / apply_sqrt(value1); + } tensor_write(output, shape, index, result); } ---- diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc index 2827399..fe4f724 100644 --- a/chapters/introduction.adoc +++ b/chapters/introduction.adoc @@ -351,7 +351,7 @@ Most operations in TOSA do not contain quantization scaling in the operation, bu The apply_scale functions provide a scaling of approximately (multiplier * 2^-shift^). The shift and value range is limited to allow a variety of implementations. The limit of 62 on shift allows the shift to be decomposed as two right shifts of 31. -The limit on value allows implementations that left shift the value before the mutliply in the case of shifts of 32 or less. +The limit on value allows implementations that left shift the value before the multiply in the case of shifts of 32 or less. For example, in the case shift=30 an implementation of the form ((value\<<2) * multiplier + round)>>32 can be used. A scaling range of 2^+12^ down to 2^-32^ is supported for both functions with a normalized multiplier. @@ -482,12 +482,23 @@ These features ensure that detection of overflow and other exceptional condition |=== |Case|Result -|Any input operand is a NaN | a NaN +|Operators other than explicitly mentioned by other rules: Any input operand is a NaN | a NaN +|Comparisons (EQUAL, GREATER, GREATER_EQUAL), where either or both operands is NaN | False + +|Comparisons ignore the sign of 0| + +|RSQRT (reciprocal square root) of negative numbers | a NaN |(± 0) × (± infinity), (± infinity) × (± 0) | a NaN +|LOG of negative numbers | a NaN + +|nonzero numbers / (± 0) | (± infinity) + |(± 0) / (± 0), (± infinity) / (± infinity) | a NaN +|(± infinity) * 0 | a NaN + | (+infinity) - (+infinity), (+infinity) + (-infinity) | a NaN | Any positive overflow | + infinity diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc index 71cc14d..238aa33 100644 --- a/chapters/pseudocode.adoc +++ b/chapters/pseudocode.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2021 ARM Limited +// (C) COPYRIGHT 2021-2022 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -85,14 +85,30 @@ in_t apply_floor(in_t input) { } in_t apply_log(in_t input) { + if (input == 0) { + return -INFINITY + } + else if (input < 0) { + return NaN; + } return the natural logarithm of input } in_t apply_max(in_t a, in_t b) { + if (in_t == float_t) { + if (isNaN(a) || isNaN(b)) { + return NaN; + } + } if (a >= b) return a; else return b; } in_t apply_min(in_t a, in_t b) { + if (in_t == float_t) { + if (isNaN(a) || isNaN(b)) { + return NaN; + } + } if (a < b) return a; else return b; } @@ -177,4 +193,7 @@ int rank(in_t input) int sum(in_t input[]) return the sum of values of an input list + +bool isNaN(float input) + return True if floating-point input value is NaN ---- -- cgit v1.2.1