diff options
author | Eric Kunze <eric.kunze@arm.com> | 2022-04-07 16:54:46 -0700 |
---|---|---|
committer | Eric Kunze <eric.kunze@arm.com> | 2022-06-17 20:38:16 +0000 |
commit | 42229d03fe55c45f0ad2ba68f190f3d68a78ae79 (patch) | |
tree | fde2487db3fe2c4e8257beec9b54044fac9da931 /chapters/type_conversion.adoc | |
parent | f9e5ba94f12a71f088c790f532cd62d33b8d25d0 (diff) | |
download | specification-42229d03fe55c45f0ad2ba68f190f3d68a78ae79.tar.gz |
Initial work on floating-point type definition
Define operations in terms of common floating-point data
types. Definitions for the data types are in the introduction.
Added a section to describe status of the different profiles.
Signed-off-by: Eric Kunze <eric.kunze@arm.com>
Change-Id: Iac57026806acfb7913f40af61176322fb02b7cc1
Diffstat (limited to 'chapters/type_conversion.adoc')
-rw-r--r-- | chapters/type_conversion.adoc | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc index c19d834..4a5349b 100644 --- a/chapters/type_conversion.adoc +++ b/chapters/type_conversion.adoc @@ -33,9 +33,9 @@ for_each(index in shape) { out = (in != 0) ? true : false; } else if (in_t == bool_t) { out = (in) ? 1 : 0; - } else if (out_t == float_t) { + } else if (out_t == fp16_t || out_t == bf16_t || out_t == fp32_t) { out = round_to_nearest_float(in); - } else if (in_t == float_t) { + } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) { out = apply_clip<out_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>); } else if (sizeof(out_t) >= sizeof(in_t)) { out = sign_extend(in); @@ -57,18 +57,30 @@ for_each(index in shape) { |Any|signed 8 to bool|int8_t|bool_t |Any|signed 8 to signed 16|int8_t|int16_t |Any|signed 8 to signed 32|int8_t|int32_t -|MI, MT|signed 8 to floating-point|int8_t|float_t +|MI, MT|signed 8 to fp16|int8_t|fp16_t +|MI, MT|signed 8 to bf16|int8_t|bf16_t +|MI, MT|signed 8 to fp32|int8_t|fp32_t |Any|signed 16 to bool|int16_t|bool_t |Any|signed 16 to signed 8|int16_t|int8_t |Any|signed 16 to signed 32|int16_t|int32_t -|MI, MT|signed 16 to floating-point|int16_t|float_t +|MI, MT|signed 16 to fp16|int16_t|fp16_t +|MI, MT|signed 16 to bf16|int16_t|bf16_t +|MI, MT|signed 16 to fp32|int16_t|fp32_t |Any|signed 32 to bool|int32_t|bool_t |Any|signed 32 to signed 8|int32_t|int8_t |Any|signed 32 to signed 16|int32_t|int16_t -|MI, MT|signed 32 to floating-point|int32_t|float_t -|MI, MT|floating-point to signed 8|float_t|int8_t -|MI, MT|floating-point to signed 16|float_t|int16_t -|MI, MT|floating-point to signed 32|float_t|int32_t +|MI, MT|signed 32 to fp16|int32_t|fp16_t +|MI, MT|signed 32 to bf16|int32_t|bf16_t +|MI, MT|signed 32 to fp32|int32_t|fp32_t +|MI, MT|fp16 to signed 8|fp16_t|int8_t +|MI, MT|fp16 to signed 16|fp16_t|int16_t +|MI, MT|fp16 to signed 32|fp16_t|int32_t +|MI, MT|bf16 to signed 8|bf16_t|int8_t +|MI, MT|bf16 to signed 16|bf16_t|int16_t +|MI, MT|bf16 to signed 32|bf16_t|int32_t +|MI, MT|fp32 to signed 8|fp32_t|int8_t +|MI, MT|fp32 to signed 16|fp32_t|int16_t +|MI, MT|fp32 to signed 32|fp32_t|int32_t |=== ==== RESCALE |