Initial FP8 support

Adds support for Open Compute Project (OCP) 8-bit floating point operations to the TOSA specification. Both E4M3 and E5M2 types are supported for profiles as indicated in the Supported Data Types table for each operator. FP8 operator list ARGMAX AVGPOOL CONV2D CONV3D DEPTHWISE_CONV2D MATMUL MAX_POOL2D TRANSPOSE_CONV2D CONST CAST CONCAT PAD DIM RESHAPE REVERSE SLICE TILE TRANSPOSE GATHER SCATTER Signed-off-by: Eric Kunze <eric.kunze@arm.com> Change-Id: I3dd83f48afcc3c880c5c88039337ff4f1fd95b1b
author: Eric Kunze <eric.kunze@arm.com> 2023-10-20 15:58:55 -0700
committer: Eric Kunze <eric.kunze@arm.com> 2024-02-14 16:36:04 -0800
commit: 74e2ceba954ed6111b3e3ce40c5ff88fe79ff043 (patch)
tree: 7e1967b073313d7df4885693eda931230d401eb0 /pseudocode/operators/CAST.tosac
parent: 9fe5e964e2193f0e345670f7f4098beecd7fd6eb (diff)
download: specification-74e2ceba954ed6111b3e3ce40c5ff88fe79ff043.tar.gz
1 files changed, 21 insertions, 10 deletions
diff --git a/pseudocode/operators/CAST.tosac b/pseudocode/operators/CAST.tosac
index fac73e3..fd3ce72 100644
--- a/pseudocode/operators/CAST.tosac
+++ b/pseudocode/operators/CAST.tosac
@@ -12,16 +12,27 @@ for_each(index in shape) {
     out_t out;
     if (out_t == bool_t) {
         out = (in != 0) ? true : false;
-    } else if (in_t == bool_t) {
-        out = (in) ? 1 : 0;
-    } else if (out_t == fp16_t || out_t == bf16_t || out_t == fp32_t) {
-        out = round_to_nearest_float(in);
-    } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) {
-        out = truncate<out_t>(apply_clip_s<i32_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>));
-    } else if (sizeof(out_t) >= sizeof(in_t)) {
-        out = sign_extend<out_t>(in);
+    } else if (is_floating_point_type<out_t>()) {
+        // Conversion to float cases
+        if (in_t == bool_t) {
+            out = (in) ? 1.0 : 0.0;
+        }
+        if (is_saturating_float_type<out_t>()) {
+            out = round_to_nearest_float_saturating(in);
+        } else {
+            out = round_to_nearest_float_nonsaturating(in);
+        }
     } else {
-        out = truncate<out_t>(in);
+        // Conversion to integer cases
+        if (in_t == bool_t) {
+            out = (in) ? 1 : 0;
+        } else if (is_floating_point_type<in_t>()) {
+            out = truncate<out_t>(apply_clip_s<i32_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>));
+        } else if (sizeof(out_t) >= sizeof(in_t)) {
+            out = sign_extend<out_t>(in);
+        } else {
+            out = truncate<out_t>(in);
+        }
     }
-    tensor_write<out_t>(output, shape, index, out);
+    tensor_write<out_t>(output, shape, index, out)
 }
author	Eric Kunze <eric.kunze@arm.com>	2023-10-20 15:58:55 -0700
committer	Eric Kunze <eric.kunze@arm.com>	2024-02-14 16:36:04 -0800
commit	74e2ceba954ed6111b3e3ce40c5ff88fe79ff043 (patch)
tree	7e1967b073313d7df4885693eda931230d401eb0 /pseudocode/operators/CAST.tosac
parent	9fe5e964e2193f0e345670f7f4098beecd7fd6eb (diff)
download	specification-74e2ceba954ed6111b3e3ce40c5ff88fe79ff043.tar.gz