Initial work on floating-point type definition

Define operations in terms of common floating-point data types. Definitions for the data types are in the introduction. Added a section to describe status of the different profiles. Signed-off-by: Eric Kunze <eric.kunze@arm.com> Change-Id: Iac57026806acfb7913f40af61176322fb02b7cc1
author: Eric Kunze <eric.kunze@arm.com> 2022-04-07 16:54:46 -0700
committer: Eric Kunze <eric.kunze@arm.com> 2022-06-17 20:38:16 +0000
commit: 42229d03fe55c45f0ad2ba68f190f3d68a78ae79 (patch)
tree: fde2487db3fe2c4e8257beec9b54044fac9da931
parent: f9e5ba94f12a71f088c790f532cd62d33b8d25d0 (diff)
download: specification-42229d03fe55c45f0ad2ba68f190f3d68a78ae79.tar.gz
15 files changed, 217 insertions, 77 deletions
diff --git a/chapters/activation_funcs.adoc b/chapters/activation_funcs.adoc
index 87f213c..27ba596 100644
--- a/chapters/activation_funcs.adoc
+++ b/chapters/activation_funcs.adoc
@@ -44,7 +44,9 @@ for_each(index in shape) {
 
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== SIGMOID
@@ -81,7 +83,9 @@ generate_lookup_table(&sigmoid_table, &sigmoid_reference);
 |===
 |Profile|Mode|in_out_t
 
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== TANH
@@ -119,5 +123,7 @@ generate_lookup_table(&tanh_table, &tanh_reference);
 |===
 |Profile|Mode|in_out_t
 
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
diff --git a/chapters/comparison.adoc b/chapters/comparison.adoc
index 67f3506..5c27071 100644
--- a/chapters/comparison.adoc
+++ b/chapters/comparison.adoc
@@ -47,7 +47,9 @@ for_each(index in shape) {
 |Profile|Mode|in_t|out_t
 
 |Any|signed 32|int32_t|bool_t
-|MI, MT|floating-point|float_t|bool_t
+|MI, MT|fp16|fp16_t|bool_t
+|MI, MT|bf16|bf16_t|bool_t
+|MI, MT|fp32|fp32_t|bool_t
 |===
 
 ==== GREATER
@@ -87,7 +89,9 @@ for_each(index in shape) {
 |Profile|Mode|in_t|out_t
 
 |Any|signed 32|int32_t|bool_t
-|MI, MT|floating-point|float_t|bool_t
+|MI, MT|fp16|fp16_t|bool_t
+|MI, MT|bf16|bf16_t|bool_t
+|MI, MT|fp32|fp32_t|bool_t
 |===
 
 ==== GREATER_EQUAL
@@ -128,5 +132,7 @@ for_each(index in shape) {
 |Profile|Mode|in_t|out_t
 
 |Any|signed 32|int32_t|bool_t
-|MI, MT|floating-point|float_t|bool_t
+|MI, MT|fp16|fp16_t|bool_t
+|MI, MT|bf16|bf16_t|bool_t
+|MI, MT|fp32|fp32_t|bool_t
 |===
diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc
index 7bc2413..0c5c4d6 100644
--- a/chapters/data_layout.adoc
+++ b/chapters/data_layout.adoc
@@ -61,7 +61,9 @@ for_each(index1 in shape) {
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== PAD
@@ -112,7 +114,9 @@ for_each(index in shape) {
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== RESHAPE
@@ -156,7 +160,9 @@ for_each(index in shape) {
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== REVERSE
@@ -195,7 +201,9 @@ for_each(index in shape) {
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== SLICE
@@ -247,7 +255,9 @@ for_each(index in shape) {
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== TILE
@@ -288,7 +298,9 @@ for_each(index in shape) {
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== TRANSPOSE
@@ -344,5 +356,7 @@ for_each(index in shape) {
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
diff --git a/chapters/data_nodes.adoc b/chapters/data_nodes.adoc
index 9d32a62..5f45464 100644
--- a/chapters/data_nodes.adoc
+++ b/chapters/data_nodes.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020-2021 ARM Limited
+// (C) COPYRIGHT 2020-2022 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -31,7 +31,9 @@ A node containing constant data for use as the input to an operation. May hold d
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== IDENTITY
@@ -56,5 +58,7 @@ Returns a tensor with the same shape, type, and contents as the input.
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index e25fb8d..27efb44 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -44,7 +44,9 @@ for_each(index in shape) {
 |Profile|Mode|in_out_t
 
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== ARITHMETIC_RIGHT_SHIFT
@@ -483,7 +485,9 @@ for_each(index in shape) {
 |Profile|Mode|in_out_t
 
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== MINIMUM
@@ -521,7 +525,9 @@ for_each(index in shape) {
 |Profile|Mode|in_out_t
 
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== MUL
@@ -571,7 +577,9 @@ for_each(index in shape) {
 |Any|signed 8|int8_t|int32_t
 |Any|signed 16|int16_t|int32_t
 |Any|signed 32|int32_t|int32_t
-|MI, MT|floating-point|float_t|float_t
+|MI, MT|fp16|fp16_t|fp16_t
+|MI, MT|bf16|bf16_t|bf16_t
+|MI, MT|fp32|fp32_t|fp32_t
 |===
 
 ==== POW
@@ -608,7 +616,9 @@ for_each(index in shape) {
 |===
 |Profile|Mode|in_out_t
 
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== SUB
@@ -646,7 +656,9 @@ for_each(index in shape) {
 |Profile|Mode|in_out_t
 
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ====   TABLE
diff --git a/chapters/ewise_ternary.adoc b/chapters/ewise_ternary.adoc
index e61e1c2..84fe14d 100644
--- a/chapters/ewise_ternary.adoc
+++ b/chapters/ewise_ternary.adoc
@@ -53,5 +53,7 @@ for_each(index in shape) {
 |Any|signed 8|bool_t|int8_t
 |Any|signed 16|bool_t|int16_t
 |Any|signed 32|bool_t|int32_t
-|MI, MT|floating-point|bool_t|float_t
+|MI, MT|bool_t|fp16|fp16_t
+|MI, MT|bool_t|bf16|bf16_t
+|MI, MT|bool_t|fp32|fp32_t
 |===
diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc
index ff7aef9..8c88f47 100644
--- a/chapters/ewise_unary.adoc
+++ b/chapters/ewise_unary.adoc
@@ -193,7 +193,9 @@ for_each(index in shape) {
 |===
 |Profile|Mode|in_out_t
 
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== FLOOR
@@ -232,7 +234,9 @@ for_each(index in shape) {
 |===
 |Profile|Mode|in_out_t
 
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== LOG
@@ -271,7 +275,9 @@ for_each(index in shape) {
 |===
 |Profile|Mode|in_out_t
 
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== LOGICAL_NOT
@@ -351,7 +357,9 @@ for_each(index in shape) {
 |Any|signed 8|int8_t|int32_t
 |Any|signed 16|int16_t|int32_t
 |Any|signed 32|int32_t|int32_t
-|MI, MT|floating-point|float_t|float_t
+|MI, MT|fp16|fp16_t|fp16_t
+|MI, MT|bf16|bf16_t|bf16_t
+|MI, MT|fp32|fp32_t|fp32_t
 |===
 
 ==== RECIPROCAL
@@ -390,7 +398,9 @@ for_each(index in shape) {
 |===
 |Profile|Mode|in_out_t
 
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== RSQRT
@@ -435,5 +445,7 @@ for_each(index in shape) {
 |===
 |Profile|Mode|in_out_t
 
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
diff --git a/chapters/image.adoc b/chapters/image.adoc
index 0b25369..690480c 100644
--- a/chapters/image.adoc
+++ b/chapters/image.adoc
@@ -81,41 +81,45 @@ ERROR_IF(OH != idiv_check((IH-1)*scale_y_n - offset_y + border_y, scale_y_d) + 1
 ERROR_IF(OW != idiv_check((IW-1)*scale_x_n - offset_x + border_x, scale_x_d) + 1);
 for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
     out_t acc;
-    y = oy * scale_y_d + offset_y;
-    x = ox * scale_x_d + offset_x;
-    iy = floor(y / scale_y_n);
-    ix = floor(x / scale_x_n);
-    if (resize_t == float_t) {
-        dy = ((float_t)y / (float_t)scale_y_n) - iy;
-        dx = ((float_t)x / (float_t)scale_x_n) - ix;
+    resize_t dx, dy;
+
+    int32_t y = oy * scale_y_d + offset_y;
+    int32_t x = ox * scale_x_d + offset_x;
+    int16_t iy = floor(y / scale_y_n);
+    int16_t ix = floor(x / scale_x_n);
+
+    if (is_floating_point(resize_t)) {
+        dy = ((resize_t)y / (resize_t)scale_y_n) - iy;
+        dx = ((resize_t)x / (resize_t)scale_x_n) - ix;
     } else {
         dy = y - iy * scale_y_n;
         dx = y - ix * scale_x_n;
     }
     // Note that -1 <= iy < IH and -1 <= ix < IW
-    iy0 = apply_max(iy, 0);
-    iy1 = apply_min(iy+1, IH-1);
-    ix0 = apply_max(ix, 0);
-    ix1 = apply_min(ix+1, IW-1);
+    int16_t iy0 = apply_max(iy, 0);
+    int16_t iy1 = apply_min(iy+1, IH-1);
+    int16_t ix0 = apply_max(ix, 0);
+    int16_t ix1 = apply_min(ix+1, IW-1);
     if (mode==BILINEAR) {
-        v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
-        v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
-        v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
-        v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
+        in_t v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
+        in_t v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
+        in_t v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
+        in_t v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
         acc  = v00 * (scale_y_n - dy) * (scale_x_n - dx);
         acc += v01 * (scale_y_n - dy) * dx;
         acc += v10 * dy * (scale_x_n - dx);
         acc += v11 * dy * dx;
         tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
     } else if (mode==NEAREST) {
-        if (resize_t == float_t) {
+        int32_t iy, ix;
+        if (is_floating_point(resize_t)) {
             iy = (dy >= 0.5) ? iy1 : iy0;
             ix = (dx >= 0.5) ? ix1 : ix0;
         } else {
             iy = (2*dy >= scale_y_n) ? iy1 : iy0;
             ix = (2*dx >= scale_x_n) ? ix1 : ix0;
         }
-        v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
+        in_t v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
         tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], v);
     }
 }
@@ -130,7 +134,9 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
 |Any|signed 8,  nearest |int16_t|int8_t|int8_t
 |Any|signed 16, bilinear|int16_t|int16_t|int48_t
 |Any|signed 16, nearest |int16_t|int16_t|int16_t
-|MI,MT|floating-point   |float_t|float_t|float_t
+|MI,MT|fp16|fp32_t|fp16_t|fp16_t
+|MI,MT|bf16|fp32_t|bf16_t|bf16_t
+|MI,MT|fp32|fp32_t|fp32_t|fp32_t
 |===
 
 *Resize Modes:*
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index 9b2e0c0..93206ca 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -106,6 +106,16 @@ The following table summarizes the three profiles:
 |Main Training|TOSA-MT|Yes|Yes|Yes
 |===
 
+=== Status
+
+The TOSA specification is a work in progress.
+
+* The Base Inference profile should be considered to be near release quality, with conformance tests available.
+* The Main Inference profile has most of the expected operators in place, but is still subject to change.
+* The reference model and conformance tests do not yet support all of the floating point types that have been defined.
+* There is not currently a conformance test suite available for Main Inference.
+* Main Training profile is pre-alpha, significant work still needs to be done for the profile, and no conformance tests are available.
+
 === Compliance
 
 This section defines when a TOSA implementation is compliant to a given TOSA specification profile.
@@ -267,10 +277,20 @@ The number formats supported by a given operator are listed in its table of supp
 | (1<<47)-1
 |Signed 48-bit two's-complement value.
 
-|float_t
+|fp16_t
+| -infinity
+| +infinity
+| 16-bit floating-point value.
+
+|bf16_t
+| -infinity
+| +infinity
+| 16-bit brain float value.
+
+|fp32_t
 | -infinity
 | +infinity
-|floating-point number. Must have features defined in the section <<Floating-point>>.
+| 32-bit floating-point value.
 |===
 
 Note: In this specification minimum<type> and maximum<type> will denote the minimum and maximum values of the data as stored in memory (ignoring the zero point).
diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc
index 0747387..1d6c2f2 100644
--- a/chapters/pseudocode.adoc
+++ b/chapters/pseudocode.adoc
@@ -152,7 +152,7 @@ The following functions provide arithmetic while defining requirements such that
 [source,c++]
 ----
 in_t apply_add<in_t>(in_t a, in_t b) {
-    if (<in_t> == float_t) return a + b;
+    if (is_floating_point(in_t)) return a + b;
     int64_t c = (int64_t)a + (int64_t)b;
     REQUIRE(c >= minimum<in_t> && c <= maximum<in_t>);
     return (in_t)c;
@@ -188,7 +188,7 @@ in_t apply_log<in_t>(in_t input) {
 }
 
 in_t apply_max<in_t>(in_t a, in_t b) {
-    if (in_t == float_t) {
+    if (is_floating_point(in_t)) {
         if (isNaN(a) || isNaN(b)) {
             return NaN;
         }
@@ -197,7 +197,7 @@ in_t apply_max<in_t>(in_t a, in_t b) {
 }
 
 in_t apply_min<in_t>(in_t a, in_t b) {
-    if (in_t == float_t) {
+    if (is_floating_point(in_t)) {
         if (isNaN(a) || isNaN(b)) {
             return NaN;
         }
@@ -214,7 +214,7 @@ in_t apply_sqrt<in_t>(in_t input) {
 }
 
 in_t apply_sub<in_t>(in_t a, in_t b) {
-    if (in_t == float_t) return a - b;
+    if (is_floating_point(in_t)) return a - b;
     int64_t c = (int64_t)a - (int64_t)b;
     REQUIRE(c >= minimum<in_t> && c <= maximum<in_t>);
     return (in_t)c;
@@ -238,6 +238,8 @@ int32_t count_leading_zeros(int32_t a) {
 ==== Numeric Conversion Helpers
 
 The following definitions are used in pseudocode to do numeric conversions.
+Where the *float_t* type is used, it represents all of the floating-point data types supported by the given profile.
+See <<Number formats>> for details on the floating-point formats.
 
 [source,c++]
 ----
@@ -276,6 +278,12 @@ Generic helper functions used to keep the pseudocode concise.
 [source,c++]
 ----
 
+bool_t is_floating_point(type) {
+    if (type == fp16_t || type == fp32_t || type == bf16_t)
+        return true;
+    return false;
+}
+
 int32_t idiv(int32_t input1, int32_t input2) {
     return input1 / input2; // Integer divide that truncates towards zero
 }
diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc
index fdf30df..368d82e 100644
--- a/chapters/reduction.adoc
+++ b/chapters/reduction.adoc
@@ -136,7 +136,9 @@ for_each(index in shape1) {
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== REDUCE_MIN
@@ -179,7 +181,9 @@ for_each(index in shape1) {
 |Any|signed 8|int8_t
 |Any|signed 16|int16_t
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== REDUCE_PRODUCT
@@ -220,7 +224,9 @@ for_each(index in shape1) {
 |===
 |Profile|Mode|in_out_t
 
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== REDUCE_SUM
@@ -262,6 +268,8 @@ for_each(index in shape1) {
 |Profile|Mode|in_out_t
 
 |Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
diff --git a/chapters/scatter_gather.adoc b/chapters/scatter_gather.adoc
index 63f30dc..524bfd3 100644
--- a/chapters/scatter_gather.adoc
+++ b/chapters/scatter_gather.adoc
@@ -105,5 +105,7 @@ for_each(0 <= n < N, 0 <= w < W, 0 <= c < C) {
 |Any|signed 8|int32_t|int8_t
 |Any|signed 16|int32_t|int16_t
 |Any|signed 32|int32_t|int32_t
-|MI,MT|float|int32_t|float
+|MI,MT|fp16|int32_t|fp16_t
+|MI,MT|bf16|int32_t|bf16_t
+|MI,MT|fp32|int32_t|fp32_t
 |===
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index 8bcb115..fb657f7 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -61,7 +61,9 @@ for_each(left_index in left_shape) {
 
 |Any|signed 8|int8_t|int32_t
 |Any|signed 16|int16_t|int32_t
-|MI, MT|floating-point|float_t|int32_t
+|MI, MT|fp16|fp16_t|int32_t
+|MI, MT|bf16|bf16_t|int32_t
+|MI, MT|fp32|fp32_t|int32_t
 |===
 
 ==== AVG_POOL2D
@@ -134,7 +136,10 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= c < C ) {
 
 |Any|signed 8|int8_t|int32_t
 |Any|signed 16|int16_t|int32_t
-|MI, MT|floating-point|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t
 |===
 
 ==== CONV2D
@@ -198,7 +203,10 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) {
 |Any|signed 8x8|int8_t|int8_t|int32_t
 |Any|signed 8x4|int8_t|int4_t|int32_t
 |Any|signed 16x8|int16_t|int8_t|int48_t
-|MI, MT|floating-point|float_t|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t|fp32_t
 |===
 
 ==== CONV3D
@@ -265,7 +273,10 @@ for_each(0 <= n < N, 0 <= od < OD, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) {
 |Any|signed 8x8|int8_t|int8_t|int32_t
 |Any|signed 8x4|int8_t|int4_t|int32_t
 |Any|signed 16x8|int16_t|int8_t|int48_t
-|MI, MT|floating-point|float_t|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t|fp32_t
 |===
 
 
@@ -330,7 +341,10 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C, 0 <= m < M) {
 |Any|signed 8x8|int8_t|int8_t|int32_t
 |Any|signed 8x4|int8_t|int4_t|int32_t
 |Any|signed 16x8|int16_t|int8_t|int48_t
-|MI, MT|floating-point|float_t|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t|fp32_t
 |===
 
 ==== FFT2D
@@ -394,7 +408,8 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W) {
 
 |===
 |Profile|Mode|in_out_t
-|MI,MT|floating-point|float
+
+|MI,MT|fp32_t|fp32_t
 |===
 
 ==== FULLY_CONNECTED
@@ -442,7 +457,10 @@ for_each(0 <= n < N, 0 <= oc < OC) {
 |Any|signed 8x8|int8_t|int8_t|int32_t
 |Any|signed 8x4|int8_t|int4_t|int32_t
 |Any|signed 16x8 |int16_t|int8_t|int48_t
-|MI, MT|floating-point|float_t|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t|fp32_t
 |===
 
 ==== MATMUL
@@ -485,7 +503,10 @@ for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) {
 
 |Any|signed 8x8|int8_t|int32_t
 |Any|signed 16x16|int16_t|int48_t
-|MI, MT|floating-point|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t
 |===
 
 ==== MAX_POOL2D
@@ -540,7 +561,9 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
 
 |Any|signed 8|int8_t
 |Any|16-bit|int16_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
 |===
 
 ==== RFFT2D
@@ -587,7 +610,8 @@ for_each(0 <= n < N, 0 <= oy < H/2 + 1, 0 <= ox < W/2 + 1) {
 
 |===
 |Profile|Mode|in_out_t
-|MI,MT|floating-point|float
+
+|MI,MT|fp32_t|fp32_t
 |===
 
 
@@ -650,5 +674,8 @@ for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC,
 |Any|signed 8x8|int8_t|int8_t|int32_t
 |Any|signed 8x4|int8_t|int4_t|int32_t
 |Any|signed 16x8|int16_t|int8_t|int48_t
-|MI, MT|floating-point|float_t|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t|fp32_t
 |===
diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc
index c19d834..4a5349b 100644
--- a/chapters/type_conversion.adoc
+++ b/chapters/type_conversion.adoc
@@ -33,9 +33,9 @@ for_each(index in shape) {
         out = (in != 0) ? true : false;
     } else if (in_t == bool_t) {
         out = (in) ? 1 : 0;
-    } else if (out_t == float_t) {
+    } else if (out_t == fp16_t || out_t == bf16_t || out_t == fp32_t) {
         out = round_to_nearest_float(in);
-    } else if (in_t == float_t) {
+    } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) {
         out = apply_clip<out_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>);
     } else if (sizeof(out_t) >= sizeof(in_t)) {
         out = sign_extend(in);
@@ -57,18 +57,30 @@ for_each(index in shape) {
 |Any|signed 8 to bool|int8_t|bool_t
 |Any|signed 8 to signed 16|int8_t|int16_t
 |Any|signed 8 to signed 32|int8_t|int32_t
-|MI, MT|signed 8 to floating-point|int8_t|float_t
+|MI, MT|signed 8 to fp16|int8_t|fp16_t
+|MI, MT|signed 8 to bf16|int8_t|bf16_t
+|MI, MT|signed 8 to fp32|int8_t|fp32_t
 |Any|signed 16 to bool|int16_t|bool_t
 |Any|signed 16 to signed 8|int16_t|int8_t
 |Any|signed 16 to signed 32|int16_t|int32_t
-|MI, MT|signed 16 to floating-point|int16_t|float_t
+|MI, MT|signed 16 to fp16|int16_t|fp16_t
+|MI, MT|signed 16 to bf16|int16_t|bf16_t
+|MI, MT|signed 16 to fp32|int16_t|fp32_t
 |Any|signed 32 to bool|int32_t|bool_t
 |Any|signed 32 to signed 8|int32_t|int8_t
 |Any|signed 32 to signed 16|int32_t|int16_t
-|MI, MT|signed 32 to floating-point|int32_t|float_t
-|MI, MT|floating-point to signed 8|float_t|int8_t
-|MI, MT|floating-point to signed 16|float_t|int16_t
-|MI, MT|floating-point to signed 32|float_t|int32_t
+|MI, MT|signed 32 to fp16|int32_t|fp16_t
+|MI, MT|signed 32 to bf16|int32_t|bf16_t
+|MI, MT|signed 32 to fp32|int32_t|fp32_t
+|MI, MT|fp16 to signed 8|fp16_t|int8_t
+|MI, MT|fp16 to signed 16|fp16_t|int16_t
+|MI, MT|fp16 to signed 32|fp16_t|int32_t
+|MI, MT|bf16 to signed 8|bf16_t|int8_t
+|MI, MT|bf16 to signed 16|bf16_t|int16_t
+|MI, MT|bf16 to signed 32|bf16_t|int32_t
+|MI, MT|fp32 to signed 8|fp32_t|int8_t
+|MI, MT|fp32 to signed 16|fp32_t|int16_t
+|MI, MT|fp32 to signed 32|fp32_t|int32_t
 |===
 
 ==== RESCALE
diff --git a/tools/dictionary.dic b/tools/dictionary.dic
index 29be61e..b062ac2 100644
--- a/tools/dictionary.dic
+++ b/tools/dictionary.dic
@@ -41,6 +41,7 @@ MERCHANTABILITY
 MUL
 multipler
 NPUs
+pre
 precisions
 pseudocode
 Pseudocode
author	Eric Kunze <eric.kunze@arm.com>	2022-04-07 16:54:46 -0700
committer	Eric Kunze <eric.kunze@arm.com>	2022-06-17 20:38:16 +0000
commit	42229d03fe55c45f0ad2ba68f190f3d68a78ae79 (patch)
tree	fde2487db3fe2c4e8257beec9b54044fac9da931
parent	f9e5ba94f12a71f088c790f532cd62d33b8d25d0 (diff)
download	specification-42229d03fe55c45f0ad2ba68f190f3d68a78ae79.tar.gz