diff options
Diffstat (limited to 'pseudocode/library')
-rw-r--r-- | pseudocode/library/generic_helpers.tosac | 11 | ||||
-rw-r--r-- | pseudocode/library/numeric_accuracy_helpers.tosac | 6 | ||||
-rw-r--r-- | pseudocode/library/numeric_conversion_helpers.tosac | 8 | ||||
-rw-r--r-- | pseudocode/library/type_conversion_helpers.tosac | 9 |
4 files changed, 26 insertions, 8 deletions
diff --git a/pseudocode/library/generic_helpers.tosac b/pseudocode/library/generic_helpers.tosac index a9d71ec..a2fdbe0 100644 --- a/pseudocode/library/generic_helpers.tosac +++ b/pseudocode/library/generic_helpers.tosac @@ -8,11 +8,20 @@ // by a licensing agreement from ARM Limited. bool_t is_floating_point(type) { - if (type == fp16_t || type == fp32_t || type == bf16_t) + if (type == fp16_t || type == fp32_t || type == bf16_t || type == fp8e4m3_t || type == fp8e5m2_t) return true; return false; } +bool_t is_saturating_float_type(type) { + // Saturate for the fp8 formats, all other floats do not saturate + if (type == fp8e4m3_t || type == fp8e5m2_t) { + return true; + } + return false; +} + + int32_t idiv(int32_t input1, int32_t input2) { return input1 / input2; // Integer divide that truncates towards zero } diff --git a/pseudocode/library/numeric_accuracy_helpers.tosac b/pseudocode/library/numeric_accuracy_helpers.tosac index 4a2b111..b89d898 100644 --- a/pseudocode/library/numeric_accuracy_helpers.tosac +++ b/pseudocode/library/numeric_accuracy_helpers.tosac @@ -31,6 +31,8 @@ fp64_t normal_min<in_t>() { case fp32_t: return exp2(-126); case bf16_t: return exp2(-126); case fp16_t: return exp2( -14); + case fp8e4m3_t: return exp2(-6); + case fp8e5m2_t: return exp2(-14); } } @@ -39,6 +41,8 @@ fp64_t normal_max<in_t>() { case fp32_t: return exp2(128) - exp2(127-23); case bf16_t: return exp2(128) - exp2(127- 7); case fp16_t: return exp2( 16) - exp2( 15-10); + case fp8e4m3_t: return exp2( 9) - exp2( 8-2); + case fp8e5m2_t: return exp2( 16) - exp2( 15-2); } } @@ -48,5 +52,7 @@ int normal_frac<in_t> () { case fp32_t: return 23; case fp16_t: return 10; case bf16_t: return 7; + case fp8e4m3_t: return 3; + case fp8e5m2_t: return 2; } } diff --git a/pseudocode/library/numeric_conversion_helpers.tosac b/pseudocode/library/numeric_conversion_helpers.tosac index fac7078..576351f 100644 --- a/pseudocode/library/numeric_conversion_helpers.tosac +++ b/pseudocode/library/numeric_conversion_helpers.tosac @@ -11,8 +11,14 @@ int round_to_nearest_int(float_t f) Converts the floating-point value to f, with rounding to the nearest integer value. For the required precision see the section: Main inference precision requirements. -float_t round_to_nearest_float(in_t f) +float_t round_to_nearest_float_nonsaturating(in_t f) Converts the input value into floating-point, rounding to the nearest representable value. + Values that are not NaN outside of the representable range of the destination type must be set to infinity of the correct sign. + For the required precision see the section: Main inference precision requirements. + +float_t round_to_nearest_float_saturating(in_t f) + Converts the input value into floating-point, rounding to the nearest representable normal value. + Values that are not NaN outside of the representable range must return the maximum representable normal value of the correct sign. For the required precision see the section: Main inference precision requirements. out_t sign_extend<out_t>(in_t input) diff --git a/pseudocode/library/type_conversion_helpers.tosac b/pseudocode/library/type_conversion_helpers.tosac index f26c589..f2b42a6 100644 --- a/pseudocode/library/type_conversion_helpers.tosac +++ b/pseudocode/library/type_conversion_helpers.tosac @@ -11,6 +11,9 @@ // A no-op for floating-point types Type make_signed(Type in_t) { + if (is_floating_point<in_t>()) { + return in_t; + } switch(in_t) { case bool_t: return bool_t; @@ -22,12 +25,6 @@ Type make_signed(Type in_t) return int32_t; case i48_t: return int48_t; - case fp16_t: - return fp16_t; - case bf16_t: - return bf16_t; - case fp32_t: - return fp32_t; } } |