aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Kunze <eric.kunze@arm.com>2022-04-07 16:54:46 -0700
committerEric Kunze <eric.kunze@arm.com>2022-06-17 20:38:16 +0000
commit42229d03fe55c45f0ad2ba68f190f3d68a78ae79 (patch)
treefde2487db3fe2c4e8257beec9b54044fac9da931
parentf9e5ba94f12a71f088c790f532cd62d33b8d25d0 (diff)
downloadspecification-42229d03fe55c45f0ad2ba68f190f3d68a78ae79.tar.gz
Initial work on floating-point type definition
Define operations in terms of common floating-point data types. Definitions for the data types are in the introduction. Added a section to describe status of the different profiles. Signed-off-by: Eric Kunze <eric.kunze@arm.com> Change-Id: Iac57026806acfb7913f40af61176322fb02b7cc1
-rw-r--r--chapters/activation_funcs.adoc12
-rw-r--r--chapters/comparison.adoc12
-rw-r--r--chapters/data_layout.adoc28
-rw-r--r--chapters/data_nodes.adoc10
-rw-r--r--chapters/ewise_binary.adoc24
-rw-r--r--chapters/ewise_ternary.adoc4
-rw-r--r--chapters/ewise_unary.adoc24
-rw-r--r--chapters/image.adoc42
-rw-r--r--chapters/introduction.adoc24
-rw-r--r--chapters/pseudocode.adoc16
-rw-r--r--chapters/reduction.adoc16
-rw-r--r--chapters/scatter_gather.adoc4
-rw-r--r--chapters/tensor_ops.adoc49
-rw-r--r--chapters/type_conversion.adoc28
-rw-r--r--tools/dictionary.dic1
15 files changed, 217 insertions, 77 deletions
diff --git a/chapters/activation_funcs.adoc b/chapters/activation_funcs.adoc
index 87f213c..27ba596 100644
--- a/chapters/activation_funcs.adoc
+++ b/chapters/activation_funcs.adoc
@@ -44,7 +44,9 @@ for_each(index in shape) {
|Any|signed 8|int8_t
|Any|signed 16|int16_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== SIGMOID
@@ -81,7 +83,9 @@ generate_lookup_table(&sigmoid_table, &sigmoid_reference);
|===
|Profile|Mode|in_out_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== TANH
@@ -119,5 +123,7 @@ generate_lookup_table(&tanh_table, &tanh_reference);
|===
|Profile|Mode|in_out_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
diff --git a/chapters/comparison.adoc b/chapters/comparison.adoc
index 67f3506..5c27071 100644
--- a/chapters/comparison.adoc
+++ b/chapters/comparison.adoc
@@ -47,7 +47,9 @@ for_each(index in shape) {
|Profile|Mode|in_t|out_t
|Any|signed 32|int32_t|bool_t
-|MI, MT|floating-point|float_t|bool_t
+|MI, MT|fp16|fp16_t|bool_t
+|MI, MT|bf16|bf16_t|bool_t
+|MI, MT|fp32|fp32_t|bool_t
|===
==== GREATER
@@ -87,7 +89,9 @@ for_each(index in shape) {
|Profile|Mode|in_t|out_t
|Any|signed 32|int32_t|bool_t
-|MI, MT|floating-point|float_t|bool_t
+|MI, MT|fp16|fp16_t|bool_t
+|MI, MT|bf16|bf16_t|bool_t
+|MI, MT|fp32|fp32_t|bool_t
|===
==== GREATER_EQUAL
@@ -128,5 +132,7 @@ for_each(index in shape) {
|Profile|Mode|in_t|out_t
|Any|signed 32|int32_t|bool_t
-|MI, MT|floating-point|float_t|bool_t
+|MI, MT|fp16|fp16_t|bool_t
+|MI, MT|bf16|bf16_t|bool_t
+|MI, MT|fp32|fp32_t|bool_t
|===
diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc
index 7bc2413..0c5c4d6 100644
--- a/chapters/data_layout.adoc
+++ b/chapters/data_layout.adoc
@@ -61,7 +61,9 @@ for_each(index1 in shape) {
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== PAD
@@ -112,7 +114,9 @@ for_each(index in shape) {
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== RESHAPE
@@ -156,7 +160,9 @@ for_each(index in shape) {
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== REVERSE
@@ -195,7 +201,9 @@ for_each(index in shape) {
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== SLICE
@@ -247,7 +255,9 @@ for_each(index in shape) {
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== TILE
@@ -288,7 +298,9 @@ for_each(index in shape) {
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== TRANSPOSE
@@ -344,5 +356,7 @@ for_each(index in shape) {
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
diff --git a/chapters/data_nodes.adoc b/chapters/data_nodes.adoc
index 9d32a62..5f45464 100644
--- a/chapters/data_nodes.adoc
+++ b/chapters/data_nodes.adoc
@@ -1,7 +1,7 @@
//
// This confidential and proprietary software may be used only as
// authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020-2021 ARM Limited
+// (C) COPYRIGHT 2020-2022 ARM Limited
// ALL RIGHTS RESERVED
// The entire notice above must be reproduced on all authorised
// copies and copies may only be made to the extent permitted
@@ -31,7 +31,9 @@ A node containing constant data for use as the input to an operation. May hold d
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== IDENTITY
@@ -56,5 +58,7 @@ Returns a tensor with the same shape, type, and contents as the input.
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc
index e25fb8d..27efb44 100644
--- a/chapters/ewise_binary.adoc
+++ b/chapters/ewise_binary.adoc
@@ -44,7 +44,9 @@ for_each(index in shape) {
|Profile|Mode|in_out_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== ARITHMETIC_RIGHT_SHIFT
@@ -483,7 +485,9 @@ for_each(index in shape) {
|Profile|Mode|in_out_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== MINIMUM
@@ -521,7 +525,9 @@ for_each(index in shape) {
|Profile|Mode|in_out_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== MUL
@@ -571,7 +577,9 @@ for_each(index in shape) {
|Any|signed 8|int8_t|int32_t
|Any|signed 16|int16_t|int32_t
|Any|signed 32|int32_t|int32_t
-|MI, MT|floating-point|float_t|float_t
+|MI, MT|fp16|fp16_t|fp16_t
+|MI, MT|bf16|bf16_t|bf16_t
+|MI, MT|fp32|fp32_t|fp32_t
|===
==== POW
@@ -608,7 +616,9 @@ for_each(index in shape) {
|===
|Profile|Mode|in_out_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== SUB
@@ -646,7 +656,9 @@ for_each(index in shape) {
|Profile|Mode|in_out_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== TABLE
diff --git a/chapters/ewise_ternary.adoc b/chapters/ewise_ternary.adoc
index e61e1c2..84fe14d 100644
--- a/chapters/ewise_ternary.adoc
+++ b/chapters/ewise_ternary.adoc
@@ -53,5 +53,7 @@ for_each(index in shape) {
|Any|signed 8|bool_t|int8_t
|Any|signed 16|bool_t|int16_t
|Any|signed 32|bool_t|int32_t
-|MI, MT|floating-point|bool_t|float_t
+|MI, MT|bool_t|fp16|fp16_t
+|MI, MT|bool_t|bf16|bf16_t
+|MI, MT|bool_t|fp32|fp32_t
|===
diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc
index ff7aef9..8c88f47 100644
--- a/chapters/ewise_unary.adoc
+++ b/chapters/ewise_unary.adoc
@@ -193,7 +193,9 @@ for_each(index in shape) {
|===
|Profile|Mode|in_out_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== FLOOR
@@ -232,7 +234,9 @@ for_each(index in shape) {
|===
|Profile|Mode|in_out_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== LOG
@@ -271,7 +275,9 @@ for_each(index in shape) {
|===
|Profile|Mode|in_out_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== LOGICAL_NOT
@@ -351,7 +357,9 @@ for_each(index in shape) {
|Any|signed 8|int8_t|int32_t
|Any|signed 16|int16_t|int32_t
|Any|signed 32|int32_t|int32_t
-|MI, MT|floating-point|float_t|float_t
+|MI, MT|fp16|fp16_t|fp16_t
+|MI, MT|bf16|bf16_t|bf16_t
+|MI, MT|fp32|fp32_t|fp32_t
|===
==== RECIPROCAL
@@ -390,7 +398,9 @@ for_each(index in shape) {
|===
|Profile|Mode|in_out_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== RSQRT
@@ -435,5 +445,7 @@ for_each(index in shape) {
|===
|Profile|Mode|in_out_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
diff --git a/chapters/image.adoc b/chapters/image.adoc
index 0b25369..690480c 100644
--- a/chapters/image.adoc
+++ b/chapters/image.adoc
@@ -81,41 +81,45 @@ ERROR_IF(OH != idiv_check((IH-1)*scale_y_n - offset_y + border_y, scale_y_d) + 1
ERROR_IF(OW != idiv_check((IW-1)*scale_x_n - offset_x + border_x, scale_x_d) + 1);
for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
out_t acc;
- y = oy * scale_y_d + offset_y;
- x = ox * scale_x_d + offset_x;
- iy = floor(y / scale_y_n);
- ix = floor(x / scale_x_n);
- if (resize_t == float_t) {
- dy = ((float_t)y / (float_t)scale_y_n) - iy;
- dx = ((float_t)x / (float_t)scale_x_n) - ix;
+ resize_t dx, dy;
+
+ int32_t y = oy * scale_y_d + offset_y;
+ int32_t x = ox * scale_x_d + offset_x;
+ int16_t iy = floor(y / scale_y_n);
+ int16_t ix = floor(x / scale_x_n);
+
+ if (is_floating_point(resize_t)) {
+ dy = ((resize_t)y / (resize_t)scale_y_n) - iy;
+ dx = ((resize_t)x / (resize_t)scale_x_n) - ix;
} else {
dy = y - iy * scale_y_n;
dx = y - ix * scale_x_n;
}
// Note that -1 <= iy < IH and -1 <= ix < IW
- iy0 = apply_max(iy, 0);
- iy1 = apply_min(iy+1, IH-1);
- ix0 = apply_max(ix, 0);
- ix1 = apply_min(ix+1, IW-1);
+ int16_t iy0 = apply_max(iy, 0);
+ int16_t iy1 = apply_min(iy+1, IH-1);
+ int16_t ix0 = apply_max(ix, 0);
+ int16_t ix1 = apply_min(ix+1, IW-1);
if (mode==BILINEAR) {
- v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
- v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
- v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
- v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
+ in_t v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
+ in_t v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
+ in_t v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
+ in_t v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
acc = v00 * (scale_y_n - dy) * (scale_x_n - dx);
acc += v01 * (scale_y_n - dy) * dx;
acc += v10 * dy * (scale_x_n - dx);
acc += v11 * dy * dx;
tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
} else if (mode==NEAREST) {
- if (resize_t == float_t) {
+ int32_t iy, ix;
+ if (is_floating_point(resize_t)) {
iy = (dy >= 0.5) ? iy1 : iy0;
ix = (dx >= 0.5) ? ix1 : ix0;
} else {
iy = (2*dy >= scale_y_n) ? iy1 : iy0;
ix = (2*dx >= scale_x_n) ? ix1 : ix0;
}
- v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
+ in_t v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], v);
}
}
@@ -130,7 +134,9 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
|Any|signed 8, nearest |int16_t|int8_t|int8_t
|Any|signed 16, bilinear|int16_t|int16_t|int48_t
|Any|signed 16, nearest |int16_t|int16_t|int16_t
-|MI,MT|floating-point |float_t|float_t|float_t
+|MI,MT|fp16|fp32_t|fp16_t|fp16_t
+|MI,MT|bf16|fp32_t|bf16_t|bf16_t
+|MI,MT|fp32|fp32_t|fp32_t|fp32_t
|===
*Resize Modes:*
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index 9b2e0c0..93206ca 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -106,6 +106,16 @@ The following table summarizes the three profiles:
|Main Training|TOSA-MT|Yes|Yes|Yes
|===
+=== Status
+
+The TOSA specification is a work in progress.
+
+* The Base Inference profile should be considered to be near release quality, with conformance tests available.
+* The Main Inference profile has most of the expected operators in place, but is still subject to change.
+* The reference model and conformance tests do not yet support all of the floating point types that have been defined.
+* There is not currently a conformance test suite available for Main Inference.
+* Main Training profile is pre-alpha, significant work still needs to be done for the profile, and no conformance tests are available.
+
=== Compliance
This section defines when a TOSA implementation is compliant to a given TOSA specification profile.
@@ -267,10 +277,20 @@ The number formats supported by a given operator are listed in its table of supp
| (1<<47)-1
|Signed 48-bit two's-complement value.
-|float_t
+|fp16_t
+| -infinity
+| +infinity
+| 16-bit floating-point value.
+
+|bf16_t
+| -infinity
+| +infinity
+| 16-bit brain float value.
+
+|fp32_t
| -infinity
| +infinity
-|floating-point number. Must have features defined in the section <<Floating-point>>.
+| 32-bit floating-point value.
|===
Note: In this specification minimum<type> and maximum<type> will denote the minimum and maximum values of the data as stored in memory (ignoring the zero point).
diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc
index 0747387..1d6c2f2 100644
--- a/chapters/pseudocode.adoc
+++ b/chapters/pseudocode.adoc
@@ -152,7 +152,7 @@ The following functions provide arithmetic while defining requirements such that
[source,c++]
----
in_t apply_add<in_t>(in_t a, in_t b) {
- if (<in_t> == float_t) return a + b;
+ if (is_floating_point(in_t)) return a + b;
int64_t c = (int64_t)a + (int64_t)b;
REQUIRE(c >= minimum<in_t> && c <= maximum<in_t>);
return (in_t)c;
@@ -188,7 +188,7 @@ in_t apply_log<in_t>(in_t input) {
}
in_t apply_max<in_t>(in_t a, in_t b) {
- if (in_t == float_t) {
+ if (is_floating_point(in_t)) {
if (isNaN(a) || isNaN(b)) {
return NaN;
}
@@ -197,7 +197,7 @@ in_t apply_max<in_t>(in_t a, in_t b) {
}
in_t apply_min<in_t>(in_t a, in_t b) {
- if (in_t == float_t) {
+ if (is_floating_point(in_t)) {
if (isNaN(a) || isNaN(b)) {
return NaN;
}
@@ -214,7 +214,7 @@ in_t apply_sqrt<in_t>(in_t input) {
}
in_t apply_sub<in_t>(in_t a, in_t b) {
- if (in_t == float_t) return a - b;
+ if (is_floating_point(in_t)) return a - b;
int64_t c = (int64_t)a - (int64_t)b;
REQUIRE(c >= minimum<in_t> && c <= maximum<in_t>);
return (in_t)c;
@@ -238,6 +238,8 @@ int32_t count_leading_zeros(int32_t a) {
==== Numeric Conversion Helpers
The following definitions are used in pseudocode to do numeric conversions.
+Where the *float_t* type is used, it represents all of the floating-point data types supported by the given profile.
+See <<Number formats>> for details on the floating-point formats.
[source,c++]
----
@@ -276,6 +278,12 @@ Generic helper functions used to keep the pseudocode concise.
[source,c++]
----
+bool_t is_floating_point(type) {
+ if (type == fp16_t || type == fp32_t || type == bf16_t)
+ return true;
+ return false;
+}
+
int32_t idiv(int32_t input1, int32_t input2) {
return input1 / input2; // Integer divide that truncates towards zero
}
diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc
index fdf30df..368d82e 100644
--- a/chapters/reduction.adoc
+++ b/chapters/reduction.adoc
@@ -136,7 +136,9 @@ for_each(index in shape1) {
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== REDUCE_MIN
@@ -179,7 +181,9 @@ for_each(index in shape1) {
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== REDUCE_PRODUCT
@@ -220,7 +224,9 @@ for_each(index in shape1) {
|===
|Profile|Mode|in_out_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== REDUCE_SUM
@@ -262,6 +268,8 @@ for_each(index in shape1) {
|Profile|Mode|in_out_t
|Any|signed 32|int32_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
diff --git a/chapters/scatter_gather.adoc b/chapters/scatter_gather.adoc
index 63f30dc..524bfd3 100644
--- a/chapters/scatter_gather.adoc
+++ b/chapters/scatter_gather.adoc
@@ -105,5 +105,7 @@ for_each(0 <= n < N, 0 <= w < W, 0 <= c < C) {
|Any|signed 8|int32_t|int8_t
|Any|signed 16|int32_t|int16_t
|Any|signed 32|int32_t|int32_t
-|MI,MT|float|int32_t|float
+|MI,MT|fp16|int32_t|fp16_t
+|MI,MT|bf16|int32_t|bf16_t
+|MI,MT|fp32|int32_t|fp32_t
|===
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc
index 8bcb115..fb657f7 100644
--- a/chapters/tensor_ops.adoc
+++ b/chapters/tensor_ops.adoc
@@ -61,7 +61,9 @@ for_each(left_index in left_shape) {
|Any|signed 8|int8_t|int32_t
|Any|signed 16|int16_t|int32_t
-|MI, MT|floating-point|float_t|int32_t
+|MI, MT|fp16|fp16_t|int32_t
+|MI, MT|bf16|bf16_t|int32_t
+|MI, MT|fp32|fp32_t|int32_t
|===
==== AVG_POOL2D
@@ -134,7 +136,10 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= c < C ) {
|Any|signed 8|int8_t|int32_t
|Any|signed 16|int16_t|int32_t
-|MI, MT|floating-point|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t
|===
==== CONV2D
@@ -198,7 +203,10 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) {
|Any|signed 8x8|int8_t|int8_t|int32_t
|Any|signed 8x4|int8_t|int4_t|int32_t
|Any|signed 16x8|int16_t|int8_t|int48_t
-|MI, MT|floating-point|float_t|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t|fp32_t
|===
==== CONV3D
@@ -265,7 +273,10 @@ for_each(0 <= n < N, 0 <= od < OD, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) {
|Any|signed 8x8|int8_t|int8_t|int32_t
|Any|signed 8x4|int8_t|int4_t|int32_t
|Any|signed 16x8|int16_t|int8_t|int48_t
-|MI, MT|floating-point|float_t|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t|fp32_t
|===
@@ -330,7 +341,10 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C, 0 <= m < M) {
|Any|signed 8x8|int8_t|int8_t|int32_t
|Any|signed 8x4|int8_t|int4_t|int32_t
|Any|signed 16x8|int16_t|int8_t|int48_t
-|MI, MT|floating-point|float_t|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t|fp32_t
|===
==== FFT2D
@@ -394,7 +408,8 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W) {
|===
|Profile|Mode|in_out_t
-|MI,MT|floating-point|float
+
+|MI,MT|fp32_t|fp32_t
|===
==== FULLY_CONNECTED
@@ -442,7 +457,10 @@ for_each(0 <= n < N, 0 <= oc < OC) {
|Any|signed 8x8|int8_t|int8_t|int32_t
|Any|signed 8x4|int8_t|int4_t|int32_t
|Any|signed 16x8 |int16_t|int8_t|int48_t
-|MI, MT|floating-point|float_t|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t|fp32_t
|===
==== MATMUL
@@ -485,7 +503,10 @@ for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) {
|Any|signed 8x8|int8_t|int32_t
|Any|signed 16x16|int16_t|int48_t
-|MI, MT|floating-point|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t
|===
==== MAX_POOL2D
@@ -540,7 +561,9 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) {
|Any|signed 8|int8_t
|Any|16-bit|int16_t
-|MI, MT|floating-point|float_t
+|MI, MT|fp16|fp16_t
+|MI, MT|bf16|bf16_t
+|MI, MT|fp32|fp32_t
|===
==== RFFT2D
@@ -587,7 +610,8 @@ for_each(0 <= n < N, 0 <= oy < H/2 + 1, 0 <= ox < W/2 + 1) {
|===
|Profile|Mode|in_out_t
-|MI,MT|floating-point|float
+
+|MI,MT|fp32_t|fp32_t
|===
@@ -650,5 +674,8 @@ for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC,
|Any|signed 8x8|int8_t|int8_t|int32_t
|Any|signed 8x4|int8_t|int4_t|int32_t
|Any|signed 16x8|int16_t|int8_t|int48_t
-|MI, MT|floating-point|float_t|float_t|float_t
+|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t
+|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t
+|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t
+|MI, MT|fp32|fp32_t|fp32_t|fp32_t
|===
diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc
index c19d834..4a5349b 100644
--- a/chapters/type_conversion.adoc
+++ b/chapters/type_conversion.adoc
@@ -33,9 +33,9 @@ for_each(index in shape) {
out = (in != 0) ? true : false;
} else if (in_t == bool_t) {
out = (in) ? 1 : 0;
- } else if (out_t == float_t) {
+ } else if (out_t == fp16_t || out_t == bf16_t || out_t == fp32_t) {
out = round_to_nearest_float(in);
- } else if (in_t == float_t) {
+ } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) {
out = apply_clip<out_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>);
} else if (sizeof(out_t) >= sizeof(in_t)) {
out = sign_extend(in);
@@ -57,18 +57,30 @@ for_each(index in shape) {
|Any|signed 8 to bool|int8_t|bool_t
|Any|signed 8 to signed 16|int8_t|int16_t
|Any|signed 8 to signed 32|int8_t|int32_t
-|MI, MT|signed 8 to floating-point|int8_t|float_t
+|MI, MT|signed 8 to fp16|int8_t|fp16_t
+|MI, MT|signed 8 to bf16|int8_t|bf16_t
+|MI, MT|signed 8 to fp32|int8_t|fp32_t
|Any|signed 16 to bool|int16_t|bool_t
|Any|signed 16 to signed 8|int16_t|int8_t
|Any|signed 16 to signed 32|int16_t|int32_t
-|MI, MT|signed 16 to floating-point|int16_t|float_t
+|MI, MT|signed 16 to fp16|int16_t|fp16_t
+|MI, MT|signed 16 to bf16|int16_t|bf16_t
+|MI, MT|signed 16 to fp32|int16_t|fp32_t
|Any|signed 32 to bool|int32_t|bool_t
|Any|signed 32 to signed 8|int32_t|int8_t
|Any|signed 32 to signed 16|int32_t|int16_t
-|MI, MT|signed 32 to floating-point|int32_t|float_t
-|MI, MT|floating-point to signed 8|float_t|int8_t
-|MI, MT|floating-point to signed 16|float_t|int16_t
-|MI, MT|floating-point to signed 32|float_t|int32_t
+|MI, MT|signed 32 to fp16|int32_t|fp16_t
+|MI, MT|signed 32 to bf16|int32_t|bf16_t
+|MI, MT|signed 32 to fp32|int32_t|fp32_t
+|MI, MT|fp16 to signed 8|fp16_t|int8_t
+|MI, MT|fp16 to signed 16|fp16_t|int16_t
+|MI, MT|fp16 to signed 32|fp16_t|int32_t
+|MI, MT|bf16 to signed 8|bf16_t|int8_t
+|MI, MT|bf16 to signed 16|bf16_t|int16_t
+|MI, MT|bf16 to signed 32|bf16_t|int32_t
+|MI, MT|fp32 to signed 8|fp32_t|int8_t
+|MI, MT|fp32 to signed 16|fp32_t|int16_t
+|MI, MT|fp32 to signed 32|fp32_t|int32_t
|===
==== RESCALE
diff --git a/tools/dictionary.dic b/tools/dictionary.dic
index 29be61e..b062ac2 100644
--- a/tools/dictionary.dic
+++ b/tools/dictionary.dic
@@ -41,6 +41,7 @@ MERCHANTABILITY
MUL
multipler
NPUs
+pre
precisions
pseudocode
Pseudocode