diff options
author | Eric Kunze <eric.kunze@arm.com> | 2023-10-20 15:58:55 -0700 |
---|---|---|
committer | Eric Kunze <eric.kunze@arm.com> | 2024-02-14 16:36:04 -0800 |
commit | 74e2ceba954ed6111b3e3ce40c5ff88fe79ff043 (patch) | |
tree | 7e1967b073313d7df4885693eda931230d401eb0 /pseudocode/operators/CAST.tosac | |
parent | 9fe5e964e2193f0e345670f7f4098beecd7fd6eb (diff) | |
download | specification-74e2ceba954ed6111b3e3ce40c5ff88fe79ff043.tar.gz |
Initial FP8 support
Adds support for Open Compute Project (OCP) 8-bit floating point operations
to the TOSA specification. Both E4M3 and E5M2 types are supported for profiles
as indicated in the Supported Data Types table for each operator.
FP8 operator list
ARGMAX
AVGPOOL
CONV2D
CONV3D
DEPTHWISE_CONV2D
MATMUL
MAX_POOL2D
TRANSPOSE_CONV2D
CONST
CAST
CONCAT
PAD
DIM
RESHAPE
REVERSE
SLICE
TILE
TRANSPOSE
GATHER
SCATTER
Signed-off-by: Eric Kunze <eric.kunze@arm.com>
Change-Id: I3dd83f48afcc3c880c5c88039337ff4f1fd95b1b
Diffstat (limited to 'pseudocode/operators/CAST.tosac')
-rw-r--r-- | pseudocode/operators/CAST.tosac | 31 |
1 files changed, 21 insertions, 10 deletions
diff --git a/pseudocode/operators/CAST.tosac b/pseudocode/operators/CAST.tosac index fac73e3..fd3ce72 100644 --- a/pseudocode/operators/CAST.tosac +++ b/pseudocode/operators/CAST.tosac @@ -12,16 +12,27 @@ for_each(index in shape) { out_t out; if (out_t == bool_t) { out = (in != 0) ? true : false; - } else if (in_t == bool_t) { - out = (in) ? 1 : 0; - } else if (out_t == fp16_t || out_t == bf16_t || out_t == fp32_t) { - out = round_to_nearest_float(in); - } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) { - out = truncate<out_t>(apply_clip_s<i32_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>)); - } else if (sizeof(out_t) >= sizeof(in_t)) { - out = sign_extend<out_t>(in); + } else if (is_floating_point_type<out_t>()) { + // Conversion to float cases + if (in_t == bool_t) { + out = (in) ? 1.0 : 0.0; + } + if (is_saturating_float_type<out_t>()) { + out = round_to_nearest_float_saturating(in); + } else { + out = round_to_nearest_float_nonsaturating(in); + } } else { - out = truncate<out_t>(in); + // Conversion to integer cases + if (in_t == bool_t) { + out = (in) ? 1 : 0; + } else if (is_floating_point_type<in_t>()) { + out = truncate<out_t>(apply_clip_s<i32_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>)); + } else if (sizeof(out_t) >= sizeof(in_t)) { + out = sign_extend<out_t>(in); + } else { + out = truncate<out_t>(in); + } } - tensor_write<out_t>(output, shape, index, out); + tensor_write<out_t>(output, shape, index, out) } |