aboutsummaryrefslogtreecommitdiff
path: root/chapters/image.adoc
blob: 6f1d3cc7e36d4cb4a72bbac6651ff58a13b7a001 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
//
// This confidential and proprietary software may be used only as
// authorised by a licensing agreement from ARM Limited
// (C) COPYRIGHT 2020-2021 ARM Limited
// ALL RIGHTS RESERVED
// The entire notice above must be reproduced on all authorised
// copies and copies may only be made to the extent permitted
// by a licensing agreement from ARM Limited.

=== Image Operators

==== RESIZE

Resizes a tensor. Resize is only allowed in the H and W dimensions.

The NEAREST_NEIGHBOR mode returns the value of the input tensor closest to the
calculated sample position for both floating-point and integer data formats.

Floating-point BILINEAR mode returns a bilinearly interpolated output value
based on the four closest input sample positions.

For integer BILINEAR interpolation mode, the output value is calculated by using
the shift value along with the other parameters to create a fixed point scaling
factor for each input. These values are then summed to create the value for
output, which has 2 * shift fractional bits. To convert back to the original
integer size, the output value must be rescaled.

The following examples show practical uses of the parameters:

* For approximate uniform input sampling between (0, 0) and (IH-1, IW-1) set
stride_y = ( (IH-1) * (1<<shift) ) / (OH-1),
stride_x = ( (IW-1) * (1<<shift) ) / (OW-1),
offset_x=0, offset_y=0, border_x=0, border_y=0.

* For power of two upscale by factor (1<<k) the following parameters can
be used for fixed point upscales:
** For upscale [OH-1,OW-1] = (1<<k) * [IH-1, IW-1] set
shift=k, stride_y=1, stride_x=1, offset_x=0, offset_y=0,
border_x=0, border_y=0.
** For upscale [OH,OW] = (1<<k) * [IH,IW] set
shift=(k+1), stride_y=2, stride_x=2, offset_x=-(1<<k)+1, offset_y=-(1<<k)+1,
border_x=1<<(k-1), border_y=1<<(k-1). This samples approximately
the input area (-0.5, -0.5) to (IH-0.5, IW-0.5).

*Arguments:*

|===
|Argument|Type|Name|Shape|Description

|Input|in_t*|input|[N,IH,IW,C]|Input tensor
|Attribute|int32_t* |output_size|[2]|[OH,OW]
|Attribute|resize_t*|stride|[2]|[stride_y, stride_x]
|Attribute|resize_t*|offset|[2]|[offset_y, offset_x]
|Attribute|int32_t* |border|[2]|[border_y, border_x]
|Attribute|int32_t  |shift|-|Shift value (must be zero if resize_t is float)
|Attribute|mode_t|mode|-|BILINEAR or NEAREST
|Output|out_t*|output|[N,OH,OW,C]|Output tensor
|===

*Operation Function*

[source,c++]
----
// Derive the output dimensions from the input dimensions
OH = idiv((IH-1)*(1<<shift) - offset_y, stride_y) + 1 + border_y;
OW = idiv((IW-1)*(1<<shift) - offset_x, stride_x) + 1 + border_x;
// Ensure the image size is supported by GPU APIs and that for integer
// implementations, position * stride does not overflow int32_t.
ERROR_IF(max(OH,OW,IH,IW) >= 16384);
ERROR_IF(stride_x <= 0 || stride_y <= 0);
if (is_floating_point(resize_t)) {
    // The shift attribute is not used for floating point
    ERROR_IF(shift != 0);
    ERROR_IF(stride_x > IW || stride_y > IH);
} else {
    // if in_t=int8_t ensure that an int32_t accumulator can be used
    ERROR_IF(shift < 1 || shift > 11);
    // set a consistent lower limit of 1/16 downscale
    // independent of the shift value to simplify implementations
    ERROR_IF(stride_x >= (16 << shift));
    ERROR_IF(stride_y >= (16 << shift));
    // offset range is similarly limited to maximum 16 pixels irrespective
    // of shift. Both stride and offset fit in int16_t when shift=11.
    ERROR_IF(offset_x <= (-16 << shift) || offset_x >= (16 << shift));
    ERROR_IF(offset_y <= (-16 << shift) || offset_y >= (16 << shift));
}
for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
    unit = (is_floating_point(resize_t)) ? 1.0 : (1 << shift);
    y = oy * stride_y + offset_y;
    x = ox * stride_x + offset_x;
    if (is_floating_point(resize_t)) {
        iy = (int32_t)apply_floor(y); dy = y - (resize_t)iy;
        ix = (int32_t)apply_floor(x); dx = x - (resize_t)ix;
    } else {
        iy = y >> shift; dy = y - (iy<<shift);
        ix = x >> shift; dx = x - (ix<<shift);
    }
    iy0 = apply_max(iy, 0);
    iy1 = apply_min(iy+1, IH-1);
    ix0 = apply_max(ix, 0);
    ix1 = apply_min(ix+1, IW-1);
    REQUIRE(ix0 <= ix1 && iy0 <= iy1);
    if (mode==BILINEAR) {
        v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
        v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
        v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
        v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
        out_t acc = v00 * (unit - dy) * (unit - dx) + v01 * (unit - dy) * dx;
        acc = acc + v10 * dy * (unit-dx) + v11 * dy * dx;
        tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
    } else if (mode==NEAREST) {
        iy = (dy >= unit/2) ? iy1 : iy0;
        ix = (dx >= unit/2) ? ix1 : ix0;
        v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
        tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], v);
    }
}
----

*Supported Data Types:*

|===
|Profile|Mode|resize_t|in_t|out_t

|Any|signed 8,  bilinear|int16_t|int8_t|int32_t
|Any|signed 8,  nearest |int16_t|int8_t|int8_t
|Any|signed 16, bilinear|int16_t|int16_t|int48_t
|Any|signed 16, nearest |int16_t|int16_t|int16_t
|MI,MT|fp16|fp32_t|fp16_t|fp16_t
|MI,MT|bf16|fp32_t|bf16_t|bf16_t
|MI,MT|fp32|fp32_t|fp32_t|fp32_t
|===

*Resize Modes:*
|===
|Mode|Description

|NEAREST|Nearest Neighbor
|BILINEAR|Bilinear interpoloation
|===