From 42229d03fe55c45f0ad2ba68f190f3d68a78ae79 Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Thu, 7 Apr 2022 16:54:46 -0700 Subject: Initial work on floating-point type definition Define operations in terms of common floating-point data types. Definitions for the data types are in the introduction. Added a section to describe status of the different profiles. Signed-off-by: Eric Kunze Change-Id: Iac57026806acfb7913f40af61176322fb02b7cc1 --- chapters/activation_funcs.adoc | 12 ++++++++--- chapters/comparison.adoc | 12 ++++++++--- chapters/data_layout.adoc | 28 ++++++++++++++++++------ chapters/data_nodes.adoc | 10 ++++++--- chapters/ewise_binary.adoc | 24 +++++++++++++++------ chapters/ewise_ternary.adoc | 4 +++- chapters/ewise_unary.adoc | 24 +++++++++++++++------ chapters/image.adoc | 42 ++++++++++++++++++++---------------- chapters/introduction.adoc | 24 +++++++++++++++++++-- chapters/pseudocode.adoc | 16 ++++++++++---- chapters/reduction.adoc | 16 ++++++++++---- chapters/scatter_gather.adoc | 4 +++- chapters/tensor_ops.adoc | 49 ++++++++++++++++++++++++++++++++---------- chapters/type_conversion.adoc | 28 +++++++++++++++++------- tools/dictionary.dic | 1 + 15 files changed, 217 insertions(+), 77 deletions(-) diff --git a/chapters/activation_funcs.adoc b/chapters/activation_funcs.adoc index 87f213c..27ba596 100644 --- a/chapters/activation_funcs.adoc +++ b/chapters/activation_funcs.adoc @@ -44,7 +44,9 @@ for_each(index in shape) { |Any|signed 8|int8_t |Any|signed 16|int16_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== SIGMOID @@ -81,7 +83,9 @@ generate_lookup_table(&sigmoid_table, &sigmoid_reference); |=== |Profile|Mode|in_out_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== TANH @@ -119,5 +123,7 @@ generate_lookup_table(&tanh_table, &tanh_reference); |=== |Profile|Mode|in_out_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== diff --git a/chapters/comparison.adoc b/chapters/comparison.adoc index 67f3506..5c27071 100644 --- a/chapters/comparison.adoc +++ b/chapters/comparison.adoc @@ -47,7 +47,9 @@ for_each(index in shape) { |Profile|Mode|in_t|out_t |Any|signed 32|int32_t|bool_t -|MI, MT|floating-point|float_t|bool_t +|MI, MT|fp16|fp16_t|bool_t +|MI, MT|bf16|bf16_t|bool_t +|MI, MT|fp32|fp32_t|bool_t |=== ==== GREATER @@ -87,7 +89,9 @@ for_each(index in shape) { |Profile|Mode|in_t|out_t |Any|signed 32|int32_t|bool_t -|MI, MT|floating-point|float_t|bool_t +|MI, MT|fp16|fp16_t|bool_t +|MI, MT|bf16|bf16_t|bool_t +|MI, MT|fp32|fp32_t|bool_t |=== ==== GREATER_EQUAL @@ -128,5 +132,7 @@ for_each(index in shape) { |Profile|Mode|in_t|out_t |Any|signed 32|int32_t|bool_t -|MI, MT|floating-point|float_t|bool_t +|MI, MT|fp16|fp16_t|bool_t +|MI, MT|bf16|bf16_t|bool_t +|MI, MT|fp32|fp32_t|bool_t |=== diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc index 7bc2413..0c5c4d6 100644 --- a/chapters/data_layout.adoc +++ b/chapters/data_layout.adoc @@ -61,7 +61,9 @@ for_each(index1 in shape) { |Any|signed 8|int8_t |Any|signed 16|int16_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== PAD @@ -112,7 +114,9 @@ for_each(index in shape) { |Any|signed 8|int8_t |Any|signed 16|int16_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== RESHAPE @@ -156,7 +160,9 @@ for_each(index in shape) { |Any|signed 8|int8_t |Any|signed 16|int16_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== REVERSE @@ -195,7 +201,9 @@ for_each(index in shape) { |Any|signed 8|int8_t |Any|signed 16|int16_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== SLICE @@ -247,7 +255,9 @@ for_each(index in shape) { |Any|signed 8|int8_t |Any|signed 16|int16_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== TILE @@ -288,7 +298,9 @@ for_each(index in shape) { |Any|signed 8|int8_t |Any|signed 16|int16_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== TRANSPOSE @@ -344,5 +356,7 @@ for_each(index in shape) { |Any|signed 8|int8_t |Any|signed 16|int16_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== diff --git a/chapters/data_nodes.adoc b/chapters/data_nodes.adoc index 9d32a62..5f45464 100644 --- a/chapters/data_nodes.adoc +++ b/chapters/data_nodes.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2021 ARM Limited +// (C) COPYRIGHT 2020-2022 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -31,7 +31,9 @@ A node containing constant data for use as the input to an operation. May hold d |Any|signed 8|int8_t |Any|signed 16|int16_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== IDENTITY @@ -56,5 +58,7 @@ Returns a tensor with the same shape, type, and contents as the input. |Any|signed 8|int8_t |Any|signed 16|int16_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== diff --git a/chapters/ewise_binary.adoc b/chapters/ewise_binary.adoc index e25fb8d..27efb44 100644 --- a/chapters/ewise_binary.adoc +++ b/chapters/ewise_binary.adoc @@ -44,7 +44,9 @@ for_each(index in shape) { |Profile|Mode|in_out_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== ARITHMETIC_RIGHT_SHIFT @@ -483,7 +485,9 @@ for_each(index in shape) { |Profile|Mode|in_out_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== MINIMUM @@ -521,7 +525,9 @@ for_each(index in shape) { |Profile|Mode|in_out_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== MUL @@ -571,7 +577,9 @@ for_each(index in shape) { |Any|signed 8|int8_t|int32_t |Any|signed 16|int16_t|int32_t |Any|signed 32|int32_t|int32_t -|MI, MT|floating-point|float_t|float_t +|MI, MT|fp16|fp16_t|fp16_t +|MI, MT|bf16|bf16_t|bf16_t +|MI, MT|fp32|fp32_t|fp32_t |=== ==== POW @@ -608,7 +616,9 @@ for_each(index in shape) { |=== |Profile|Mode|in_out_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== SUB @@ -646,7 +656,9 @@ for_each(index in shape) { |Profile|Mode|in_out_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== TABLE diff --git a/chapters/ewise_ternary.adoc b/chapters/ewise_ternary.adoc index e61e1c2..84fe14d 100644 --- a/chapters/ewise_ternary.adoc +++ b/chapters/ewise_ternary.adoc @@ -53,5 +53,7 @@ for_each(index in shape) { |Any|signed 8|bool_t|int8_t |Any|signed 16|bool_t|int16_t |Any|signed 32|bool_t|int32_t -|MI, MT|floating-point|bool_t|float_t +|MI, MT|bool_t|fp16|fp16_t +|MI, MT|bool_t|bf16|bf16_t +|MI, MT|bool_t|fp32|fp32_t |=== diff --git a/chapters/ewise_unary.adoc b/chapters/ewise_unary.adoc index ff7aef9..8c88f47 100644 --- a/chapters/ewise_unary.adoc +++ b/chapters/ewise_unary.adoc @@ -193,7 +193,9 @@ for_each(index in shape) { |=== |Profile|Mode|in_out_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== FLOOR @@ -232,7 +234,9 @@ for_each(index in shape) { |=== |Profile|Mode|in_out_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== LOG @@ -271,7 +275,9 @@ for_each(index in shape) { |=== |Profile|Mode|in_out_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== LOGICAL_NOT @@ -351,7 +357,9 @@ for_each(index in shape) { |Any|signed 8|int8_t|int32_t |Any|signed 16|int16_t|int32_t |Any|signed 32|int32_t|int32_t -|MI, MT|floating-point|float_t|float_t +|MI, MT|fp16|fp16_t|fp16_t +|MI, MT|bf16|bf16_t|bf16_t +|MI, MT|fp32|fp32_t|fp32_t |=== ==== RECIPROCAL @@ -390,7 +398,9 @@ for_each(index in shape) { |=== |Profile|Mode|in_out_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== RSQRT @@ -435,5 +445,7 @@ for_each(index in shape) { |=== |Profile|Mode|in_out_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== diff --git a/chapters/image.adoc b/chapters/image.adoc index 0b25369..690480c 100644 --- a/chapters/image.adoc +++ b/chapters/image.adoc @@ -81,41 +81,45 @@ ERROR_IF(OH != idiv_check((IH-1)*scale_y_n - offset_y + border_y, scale_y_d) + 1 ERROR_IF(OW != idiv_check((IW-1)*scale_x_n - offset_x + border_x, scale_x_d) + 1); for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) { out_t acc; - y = oy * scale_y_d + offset_y; - x = ox * scale_x_d + offset_x; - iy = floor(y / scale_y_n); - ix = floor(x / scale_x_n); - if (resize_t == float_t) { - dy = ((float_t)y / (float_t)scale_y_n) - iy; - dx = ((float_t)x / (float_t)scale_x_n) - ix; + resize_t dx, dy; + + int32_t y = oy * scale_y_d + offset_y; + int32_t x = ox * scale_x_d + offset_x; + int16_t iy = floor(y / scale_y_n); + int16_t ix = floor(x / scale_x_n); + + if (is_floating_point(resize_t)) { + dy = ((resize_t)y / (resize_t)scale_y_n) - iy; + dx = ((resize_t)x / (resize_t)scale_x_n) - ix; } else { dy = y - iy * scale_y_n; dx = y - ix * scale_x_n; } // Note that -1 <= iy < IH and -1 <= ix < IW - iy0 = apply_max(iy, 0); - iy1 = apply_min(iy+1, IH-1); - ix0 = apply_max(ix, 0); - ix1 = apply_min(ix+1, IW-1); + int16_t iy0 = apply_max(iy, 0); + int16_t iy1 = apply_min(iy+1, IH-1); + int16_t ix0 = apply_max(ix, 0); + int16_t ix1 = apply_min(ix+1, IW-1); if (mode==BILINEAR) { - v00 = tensor_read(input, [N,IH,IW,C], [n,iy0,ix0,c]); - v01 = tensor_read(input, [N,IH,IW,C], [n,iy0,ix1,c]); - v10 = tensor_read(input, [N,IH,IW,C], [n,iy1,ix0,c]); - v11 = tensor_read(input, [N,IH,IW,C], [n,iy1,ix1,c]); + in_t v00 = tensor_read(input, [N,IH,IW,C], [n,iy0,ix0,c]); + in_t v01 = tensor_read(input, [N,IH,IW,C], [n,iy0,ix1,c]); + in_t v10 = tensor_read(input, [N,IH,IW,C], [n,iy1,ix0,c]); + in_t v11 = tensor_read(input, [N,IH,IW,C], [n,iy1,ix1,c]); acc = v00 * (scale_y_n - dy) * (scale_x_n - dx); acc += v01 * (scale_y_n - dy) * dx; acc += v10 * dy * (scale_x_n - dx); acc += v11 * dy * dx; tensor_write(output, [N,OH,OW,C], [n,oy,ox,c], acc); } else if (mode==NEAREST) { - if (resize_t == float_t) { + int32_t iy, ix; + if (is_floating_point(resize_t)) { iy = (dy >= 0.5) ? iy1 : iy0; ix = (dx >= 0.5) ? ix1 : ix0; } else { iy = (2*dy >= scale_y_n) ? iy1 : iy0; ix = (2*dx >= scale_x_n) ? ix1 : ix0; } - v = tensor_read(input, [N,IH,IW,C], [n,iy,ix,c]); + in_t v = tensor_read(input, [N,IH,IW,C], [n,iy,ix,c]); tensor_write(output, [N,OH,OW,C], [n,oy,ox,c], v); } } @@ -130,7 +134,9 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) { |Any|signed 8, nearest |int16_t|int8_t|int8_t |Any|signed 16, bilinear|int16_t|int16_t|int48_t |Any|signed 16, nearest |int16_t|int16_t|int16_t -|MI,MT|floating-point |float_t|float_t|float_t +|MI,MT|fp16|fp32_t|fp16_t|fp16_t +|MI,MT|bf16|fp32_t|bf16_t|bf16_t +|MI,MT|fp32|fp32_t|fp32_t|fp32_t |=== *Resize Modes:* diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc index 9b2e0c0..93206ca 100644 --- a/chapters/introduction.adoc +++ b/chapters/introduction.adoc @@ -106,6 +106,16 @@ The following table summarizes the three profiles: |Main Training|TOSA-MT|Yes|Yes|Yes |=== +=== Status + +The TOSA specification is a work in progress. + +* The Base Inference profile should be considered to be near release quality, with conformance tests available. +* The Main Inference profile has most of the expected operators in place, but is still subject to change. +* The reference model and conformance tests do not yet support all of the floating point types that have been defined. +* There is not currently a conformance test suite available for Main Inference. +* Main Training profile is pre-alpha, significant work still needs to be done for the profile, and no conformance tests are available. + === Compliance This section defines when a TOSA implementation is compliant to a given TOSA specification profile. @@ -267,10 +277,20 @@ The number formats supported by a given operator are listed in its table of supp | (1<<47)-1 |Signed 48-bit two's-complement value. -|float_t +|fp16_t +| -infinity +| +infinity +| 16-bit floating-point value. + +|bf16_t +| -infinity +| +infinity +| 16-bit brain float value. + +|fp32_t | -infinity | +infinity -|floating-point number. Must have features defined in the section <>. +| 32-bit floating-point value. |=== Note: In this specification minimum and maximum will denote the minimum and maximum values of the data as stored in memory (ignoring the zero point). diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc index 0747387..1d6c2f2 100644 --- a/chapters/pseudocode.adoc +++ b/chapters/pseudocode.adoc @@ -152,7 +152,7 @@ The following functions provide arithmetic while defining requirements such that [source,c++] ---- in_t apply_add(in_t a, in_t b) { - if ( == float_t) return a + b; + if (is_floating_point(in_t)) return a + b; int64_t c = (int64_t)a + (int64_t)b; REQUIRE(c >= minimum && c <= maximum); return (in_t)c; @@ -188,7 +188,7 @@ in_t apply_log(in_t input) { } in_t apply_max(in_t a, in_t b) { - if (in_t == float_t) { + if (is_floating_point(in_t)) { if (isNaN(a) || isNaN(b)) { return NaN; } @@ -197,7 +197,7 @@ in_t apply_max(in_t a, in_t b) { } in_t apply_min(in_t a, in_t b) { - if (in_t == float_t) { + if (is_floating_point(in_t)) { if (isNaN(a) || isNaN(b)) { return NaN; } @@ -214,7 +214,7 @@ in_t apply_sqrt(in_t input) { } in_t apply_sub(in_t a, in_t b) { - if (in_t == float_t) return a - b; + if (is_floating_point(in_t)) return a - b; int64_t c = (int64_t)a - (int64_t)b; REQUIRE(c >= minimum && c <= maximum); return (in_t)c; @@ -238,6 +238,8 @@ int32_t count_leading_zeros(int32_t a) { ==== Numeric Conversion Helpers The following definitions are used in pseudocode to do numeric conversions. +Where the *float_t* type is used, it represents all of the floating-point data types supported by the given profile. +See <> for details on the floating-point formats. [source,c++] ---- @@ -276,6 +278,12 @@ Generic helper functions used to keep the pseudocode concise. [source,c++] ---- +bool_t is_floating_point(type) { + if (type == fp16_t || type == fp32_t || type == bf16_t) + return true; + return false; +} + int32_t idiv(int32_t input1, int32_t input2) { return input1 / input2; // Integer divide that truncates towards zero } diff --git a/chapters/reduction.adoc b/chapters/reduction.adoc index fdf30df..368d82e 100644 --- a/chapters/reduction.adoc +++ b/chapters/reduction.adoc @@ -136,7 +136,9 @@ for_each(index in shape1) { |Any|signed 8|int8_t |Any|signed 16|int16_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== REDUCE_MIN @@ -179,7 +181,9 @@ for_each(index in shape1) { |Any|signed 8|int8_t |Any|signed 16|int16_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== REDUCE_PRODUCT @@ -220,7 +224,9 @@ for_each(index in shape1) { |=== |Profile|Mode|in_out_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== REDUCE_SUM @@ -262,6 +268,8 @@ for_each(index in shape1) { |Profile|Mode|in_out_t |Any|signed 32|int32_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== diff --git a/chapters/scatter_gather.adoc b/chapters/scatter_gather.adoc index 63f30dc..524bfd3 100644 --- a/chapters/scatter_gather.adoc +++ b/chapters/scatter_gather.adoc @@ -105,5 +105,7 @@ for_each(0 <= n < N, 0 <= w < W, 0 <= c < C) { |Any|signed 8|int32_t|int8_t |Any|signed 16|int32_t|int16_t |Any|signed 32|int32_t|int32_t -|MI,MT|float|int32_t|float +|MI,MT|fp16|int32_t|fp16_t +|MI,MT|bf16|int32_t|bf16_t +|MI,MT|fp32|int32_t|fp32_t |=== diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index 8bcb115..fb657f7 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -61,7 +61,9 @@ for_each(left_index in left_shape) { |Any|signed 8|int8_t|int32_t |Any|signed 16|int16_t|int32_t -|MI, MT|floating-point|float_t|int32_t +|MI, MT|fp16|fp16_t|int32_t +|MI, MT|bf16|bf16_t|int32_t +|MI, MT|fp32|fp32_t|int32_t |=== ==== AVG_POOL2D @@ -134,7 +136,10 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= c < C ) { |Any|signed 8|int8_t|int32_t |Any|signed 16|int16_t|int32_t -|MI, MT|floating-point|float_t|float_t +|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t +|MI, MT|fp16 with fp32 accumulate|fp16_t|fp32_t +|MI, MT|bf16 with fp32 accumulate|bf16_t|fp32_t +|MI, MT|fp32|fp32_t|fp32_t |=== ==== CONV2D @@ -198,7 +203,10 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t |Any|signed 16x8|int16_t|int8_t|int48_t -|MI, MT|floating-point|float_t|float_t|float_t +|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t +|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t +|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t +|MI, MT|fp32|fp32_t|fp32_t|fp32_t |=== ==== CONV3D @@ -265,7 +273,10 @@ for_each(0 <= n < N, 0 <= od < OD, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t |Any|signed 16x8|int16_t|int8_t|int48_t -|MI, MT|floating-point|float_t|float_t|float_t +|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t +|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t +|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t +|MI, MT|fp32|fp32_t|fp32_t|fp32_t |=== @@ -330,7 +341,10 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C, 0 <= m < M) { |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t |Any|signed 16x8|int16_t|int8_t|int48_t -|MI, MT|floating-point|float_t|float_t|float_t +|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t +|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t +|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t +|MI, MT|fp32|fp32_t|fp32_t|fp32_t |=== ==== FFT2D @@ -394,7 +408,8 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W) { |=== |Profile|Mode|in_out_t -|MI,MT|floating-point|float + +|MI,MT|fp32_t|fp32_t |=== ==== FULLY_CONNECTED @@ -442,7 +457,10 @@ for_each(0 <= n < N, 0 <= oc < OC) { |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t |Any|signed 16x8 |int16_t|int8_t|int48_t -|MI, MT|floating-point|float_t|float_t|float_t +|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t +|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t +|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t +|MI, MT|fp32|fp32_t|fp32_t|fp32_t |=== ==== MATMUL @@ -485,7 +503,10 @@ for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) { |Any|signed 8x8|int8_t|int32_t |Any|signed 16x16|int16_t|int48_t -|MI, MT|floating-point|float_t|float_t +|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t +|MI, MT|fp16 with fp32 accumulate|fp16_t|fp32_t +|MI, MT|bf16 with fp32 accumulate|bf16_t|fp32_t +|MI, MT|fp32|fp32_t|fp32_t |=== ==== MAX_POOL2D @@ -540,7 +561,9 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { |Any|signed 8|int8_t |Any|16-bit|int16_t -|MI, MT|floating-point|float_t +|MI, MT|fp16|fp16_t +|MI, MT|bf16|bf16_t +|MI, MT|fp32|fp32_t |=== ==== RFFT2D @@ -587,7 +610,8 @@ for_each(0 <= n < N, 0 <= oy < H/2 + 1, 0 <= ox < W/2 + 1) { |=== |Profile|Mode|in_out_t -|MI,MT|floating-point|float + +|MI,MT|fp32_t|fp32_t |=== @@ -650,5 +674,8 @@ for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t |Any|signed 16x8|int16_t|int8_t|int48_t -|MI, MT|floating-point|float_t|float_t|float_t +|MI, MT|fp16 with fp16 accumulate|fp16_t|fp16_t|fp16_t +|MI, MT|fp16 with fp32 accumulate|fp16_t|fp16_t|fp32_t +|MI, MT|bf16 with fp32 accumulate|bf16_t|bf16_t|fp32_t +|MI, MT|fp32|fp32_t|fp32_t|fp32_t |=== diff --git a/chapters/type_conversion.adoc b/chapters/type_conversion.adoc index c19d834..4a5349b 100644 --- a/chapters/type_conversion.adoc +++ b/chapters/type_conversion.adoc @@ -33,9 +33,9 @@ for_each(index in shape) { out = (in != 0) ? true : false; } else if (in_t == bool_t) { out = (in) ? 1 : 0; - } else if (out_t == float_t) { + } else if (out_t == fp16_t || out_t == bf16_t || out_t == fp32_t) { out = round_to_nearest_float(in); - } else if (in_t == float_t) { + } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) { out = apply_clip(round_to_nearest_int(in), minimum, maximum); } else if (sizeof(out_t) >= sizeof(in_t)) { out = sign_extend(in); @@ -57,18 +57,30 @@ for_each(index in shape) { |Any|signed 8 to bool|int8_t|bool_t |Any|signed 8 to signed 16|int8_t|int16_t |Any|signed 8 to signed 32|int8_t|int32_t -|MI, MT|signed 8 to floating-point|int8_t|float_t +|MI, MT|signed 8 to fp16|int8_t|fp16_t +|MI, MT|signed 8 to bf16|int8_t|bf16_t +|MI, MT|signed 8 to fp32|int8_t|fp32_t |Any|signed 16 to bool|int16_t|bool_t |Any|signed 16 to signed 8|int16_t|int8_t |Any|signed 16 to signed 32|int16_t|int32_t -|MI, MT|signed 16 to floating-point|int16_t|float_t +|MI, MT|signed 16 to fp16|int16_t|fp16_t +|MI, MT|signed 16 to bf16|int16_t|bf16_t +|MI, MT|signed 16 to fp32|int16_t|fp32_t |Any|signed 32 to bool|int32_t|bool_t |Any|signed 32 to signed 8|int32_t|int8_t |Any|signed 32 to signed 16|int32_t|int16_t -|MI, MT|signed 32 to floating-point|int32_t|float_t -|MI, MT|floating-point to signed 8|float_t|int8_t -|MI, MT|floating-point to signed 16|float_t|int16_t -|MI, MT|floating-point to signed 32|float_t|int32_t +|MI, MT|signed 32 to fp16|int32_t|fp16_t +|MI, MT|signed 32 to bf16|int32_t|bf16_t +|MI, MT|signed 32 to fp32|int32_t|fp32_t +|MI, MT|fp16 to signed 8|fp16_t|int8_t +|MI, MT|fp16 to signed 16|fp16_t|int16_t +|MI, MT|fp16 to signed 32|fp16_t|int32_t +|MI, MT|bf16 to signed 8|bf16_t|int8_t +|MI, MT|bf16 to signed 16|bf16_t|int16_t +|MI, MT|bf16 to signed 32|bf16_t|int32_t +|MI, MT|fp32 to signed 8|fp32_t|int8_t +|MI, MT|fp32 to signed 16|fp32_t|int16_t +|MI, MT|fp32 to signed 32|fp32_t|int32_t |=== ==== RESCALE diff --git a/tools/dictionary.dic b/tools/dictionary.dic index 29be61e..b062ac2 100644 --- a/tools/dictionary.dic +++ b/tools/dictionary.dic @@ -41,6 +41,7 @@ MERCHANTABILITY MUL multipler NPUs +pre precisions pseudocode Pseudocode -- cgit v1.2.1