aboutsummaryrefslogtreecommitdiff
path: root/chapters/type_conversion.adoc
blob: 4a5349b930820e8914dfc1259c69730b7269512e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
//
// This confidential and proprietary software may be used only as
// authorised by a licensing agreement from ARM Limited
// (C) COPYRIGHT 2020-2022 ARM Limited
// ALL RIGHTS RESERVED
// The entire notice above must be reproduced on all authorised
// copies and copies may only be made to the extent permitted
// by a licensing agreement from ARM Limited.

=== Type Conversion

==== CAST

Casts a tensor from one data type to another.

*Arguments:*

|===
|Argument|Type|Name|Shape|Description

|Input|in_t*|input|shape|Input tensor
|Output|out_t*|output|shape|Output tensor
|===

*Operation Function:*

[source,c++]
----
for_each(index in shape) {
    in_t in = tensor_read<in_t>(input, shape, index);
    out_t out;
    if (out_t == bool_t) {
        out = (in != 0) ? true : false;
    } else if (in_t == bool_t) {
        out = (in) ? 1 : 0;
    } else if (out_t == fp16_t || out_t == bf16_t || out_t == fp32_t) {
        out = round_to_nearest_float(in);
    } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) {
        out = apply_clip<out_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>);
    } else if (sizeof(out_t) >= sizeof(in_t)) {
        out = sign_extend(in);
    } else {
        out = truncate(in);
    }
    tensor_write<out_t>(output, shape, index, out)
}
----

*Supported Data Types:*

|===
|Profile|Mode|in_t|out_t

|Any|bool to signed 8|bool_t|int8_t
|Any|bool to signed 16|bool_t|int16_t
|Any|bool to signed 32|bool_t|int32_t
|Any|signed 8 to bool|int8_t|bool_t
|Any|signed 8 to signed 16|int8_t|int16_t
|Any|signed 8 to signed 32|int8_t|int32_t
|MI, MT|signed 8 to fp16|int8_t|fp16_t
|MI, MT|signed 8 to bf16|int8_t|bf16_t
|MI, MT|signed 8 to fp32|int8_t|fp32_t
|Any|signed 16 to bool|int16_t|bool_t
|Any|signed 16 to signed 8|int16_t|int8_t
|Any|signed 16 to signed 32|int16_t|int32_t
|MI, MT|signed 16 to fp16|int16_t|fp16_t
|MI, MT|signed 16 to bf16|int16_t|bf16_t
|MI, MT|signed 16 to fp32|int16_t|fp32_t
|Any|signed 32 to bool|int32_t|bool_t
|Any|signed 32 to signed 8|int32_t|int8_t
|Any|signed 32 to signed 16|int32_t|int16_t
|MI, MT|signed 32 to fp16|int32_t|fp16_t
|MI, MT|signed 32 to bf16|int32_t|bf16_t
|MI, MT|signed 32 to fp32|int32_t|fp32_t
|MI, MT|fp16 to signed 8|fp16_t|int8_t
|MI, MT|fp16 to signed 16|fp16_t|int16_t
|MI, MT|fp16 to signed 32|fp16_t|int32_t
|MI, MT|bf16 to signed 8|bf16_t|int8_t
|MI, MT|bf16 to signed 16|bf16_t|int16_t
|MI, MT|bf16 to signed 32|bf16_t|int32_t
|MI, MT|fp32 to signed 8|fp32_t|int8_t
|MI, MT|fp32 to signed 16|fp32_t|int16_t
|MI, MT|fp32 to signed 32|fp32_t|int32_t
|===

==== RESCALE

Rescale quantized values into a new domain. This function scales by factor: multiplier * 2^-shift^.

*Arguments:*

|===
|Argument|Type|Name|Shape|Description

|Input|in_t*|input|shape|Input tensor from 1 to 4 dims
|Output|out_t*|output|shape|Output tensor with the same shape as input
|Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types.
|Attribute|out_t|output_zp|-|Output tensor zero point. Must be zero for non-int8 types.
|Input (MT profile) Attribute (BI/MI profiles)|mul_t*|multiplier|[NC]|Scaling multiplier array
|Input (MT profile) Attribute (BI/MI profiles)|uint6_t*|shift|[NC]|Scaling shift array
|Attribute|bool_t|scale32|-|if (scale32) mul_t=int32_t else mul_t=int16_t
|Attribute|bool_t|double_round|-|Select double round mode
|Attribute|bool_t|per_channel|-|if (per_channel) NC=shape[dims-1] else NC=1
|===

*Operation Function:*

[source,c++]
----
for_each(index in shape) {
    // uint16 values can have zero_point 0 or 32768
    // int8/uint8 can have zero point within their valid range
    // No other types can have zero point != 0
    ERROR_IF(in_t != int8_t &&
             in_t != uint8_t &&
             in_t != uint16_t && input_zp != 0);
    ERROR_IF(out_t != int8_t &&
             out_t != uint8_t &&
             out_t != uint16_t && output_zp != 0);
    ERROR_IF(in_t == uint16_t && (input_zp != 0 || input_zp != 32768));
    ERROR_IF(out_t == uint16_t && (output_zp != 0 || output_zp != 32768));
    ERROR_IF(scale32 && in_t == int48_t);
    ERROR_IF(!scale32 && double_round);
    int48_t value = tensor_read<in_t>(input, shape, index);
    value = value - input_zp;
    int c = (per_channel) ? index[dims-1] : 0;
    int32_t result = (scale32) ?
        apply_scale_32(value, multiplier[c], shift[c], double_round) :
        apply_scale_16(value, multiplier[c], shift[c]);
    result = (out_t)apply_clip<int32_t>(result + output_zp, minimum<out_t>, maximum<out_t>);
    tensor_write<out_t>(output, shape, index, result);
}
----

*Supported Data Types:*

|===
|Profile|Mode|in_t|out_t

|Any|signed 8 to signed 8|int8_t|int8_t
|Any|signed 8 to signed 16|int8_t|int16_t
|Any|signed 8 to signed 32|int8_t|int32_t
|Any|signed 8 to unsigned 8|int8_t|uint8_t
|Any|signed 16 to signed 8|int16_t|int8_t
|Any|signed 16 to signed 16|int16_t|int16_t
|Any|signed 16 to signed 32|int16_t|int32_t
|Any|signed 16 to unsigned 8|int16_t|uint8_t
|Any|signed 16 to unsigned 16|int16_t|uint16_t
|Any|signed 32 to signed 8|int32_t|int8_t
|Any|signed 32 to signed 16|int32_t|int16_t
|Any|signed 32 to signed 32|int32_t|int32_t
|Any|signed 48 to signed 8|int48_t|int8_t
|Any|signed 48 to signed 16|int48_t|int16_t
|Any|signed 48 to signed 32|int48_t|int32_t
|Any|unsigned 8 to signed 8|uint8_t|int8_t
|Any|unsigned 8 to signed 16|uint8_t|int16_t
|Any|unsigned 16 to signed 16|uint16_t|int16_t
|===