chapters/type_conversion.adoc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

//
// This confidential and proprietary software may be used only as
// authorised by a licensing agreement from ARM Limited
// (C) COPYRIGHT 2020-2022 ARM Limited
// ALL RIGHTS RESERVED
// The entire notice above must be reproduced on all authorised
// copies and copies may only be made to the extent permitted
// by a licensing agreement from ARM Limited.

=== Type Conversion

==== CAST

Casts a tensor from one data type to another.

include::{generated}/operators/CAST.adoc[]

*Operation Function:*

[source,c++]
----
for_each(index in shape) {
    in_t in = tensor_read<in_t>(input, shape, index);
    out_t out;
    if (out_t == bool_t) {
        out = (in != 0) ? true : false;
    } else if (in_t == bool_t) {
        out = (in) ? 1 : 0;
    } else if (out_t == fp16_t || out_t == bf16_t || out_t == fp32_t) {
        out = round_to_nearest_float(in);
    } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) {
        out = apply_clip<out_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>);
    } else if (sizeof(out_t) >= sizeof(in_t)) {
        out = sign_extend(in);
    } else {
        out = truncate(in);
    }
    tensor_write<out_t>(output, shape, index, out)
}
----

==== RESCALE

Rescale quantized values into a new domain. This function scales by factor: multiplier * 2^-shift^.

include::{generated}/operators/RESCALE.adoc[]

*Operation Function:*

[source,c++]
----
for_each(index in shape) {
    // uint16 values can have zero_point 0 or 32768
    // int8/uint8 can have zero point within their valid range
    // No other types can have zero point != 0
    ERROR_IF(in_t != int8_t &&
             in_t != uint8_t &&
             in_t != uint16_t && input_zp != 0);
    ERROR_IF(out_t != int8_t &&
             out_t != uint8_t &&
             out_t != uint16_t && output_zp != 0);
    ERROR_IF(in_t == uint16_t && (input_zp != 0 || input_zp != 32768));
    ERROR_IF(out_t == uint16_t && (output_zp != 0 || output_zp != 32768));
    ERROR_IF(scale32 && in_t == int48_t);
    ERROR_IF(!scale32 && double_round);
    int48_t value = tensor_read<in_t>(input, shape, index);
    value = value - input_zp;
    int c = (per_channel) ? index[rank(input) - 1] : 0;
    int32_t result = (scale32) ?
        apply_scale_32(value, multiplier[c], shift[c], double_round) :
        apply_scale_16(value, multiplier[c], shift[c]);
    result = (out_t)apply_clip<int32_t>(result + output_zp, minimum<out_t>, maximum<out_t>);
    tensor_write<out_t>(output, shape, index, result);
}
----