aboutsummaryrefslogtreecommitdiff
path: root/chapters/image.adoc
blob: 7476d8ad431d5536f81b3b0381d7ef3ab2308dc6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
//
// This confidential and proprietary software may be used only as
// authorised by a licensing agreement from ARM Limited
// (C) COPYRIGHT 2020-2021 ARM Limited
// ALL RIGHTS RESERVED
// The entire notice above must be reproduced on all authorised
// copies and copies may only be made to the extent permitted
// by a licensing agreement from ARM Limited.

=== Image Operators

==== RESIZE

Resizes a tensor. Resize is only allowed in the H and W dimensions.

The NEAREST_NEIGHBOR mode returns the value of the input tensor closest to the
calculated sample position for both floating-point and integer data formats.

Floating-point BILINEAR mode returns a bilinearly interpolated output value
based on the four closest input sample positions.

For integer BILINEAR interpolation mode, the output value is calculated by using
the shift value along with the other parameters to create a fixed point scaling
factor for each input. These values are then summed to create the value for
output, which has 2 * shift fractional bits. To convert back to the original
integer size, the output value must be rescaled.

For floating-point stride, stride_y should be set to  IH/OH, stride_x should be
set to IW/OW. When using integer stride, stride_y is approximately
(IH<<shift)/OH and stride_x is approximately (IW<<shift)/OW. OH and OW are also
supplied as inputs since there may be off by one errors if calculating OH and OW
from the strides.

*Arguments:*

|===
|Argument|Type|Name|Shape|Description

|Input|in_t*|input|[N,IH,IW,C]|Input tensor
|Attribute|int*|output_size|[2]|[OH,OW]
|Attribute|resize_t*|stride|[2]|[stride_y, stride_x]
|Attribute|resize_t*|offset|[2]|[offset_y, offset_x]
|Attribute|int      |shift|-|Shift value (must be zero if resize_t is float)
|Attribute|mode_t|mode|-|BILINEAR or NEAREST
|Output|out_t*|output|[N,OH,OW,C]|Output tensor
|===

*Operation Function*

[source,c++]
----
// Ensure image size is supported by GPU APIs and that for integer
// implementations, position * stride does not overflow int32_t.
ERROR_IF(max(OH,OW,IH,IW) >= 16384);
ERROR_IF(stride_x <= 0 || stride_y <= 0);
if (resize_t == float_t) {
    // The shift attribute is not used for floating point
    ERROR_IF(shift != 0);
    ERROR_IF(stride_x > IW || stride_y > IH);
} else {
    // if in_t=int8_t ensure that an int32_t accumulator can be used
    ERROR_IF(shift < 1 || shift > 11);
    // set a consistent lower limit of 1/16 downscale
    // independent of the shift value to simplify implementations
    ERROR_IF(stride_x >= (16 << shift));
    ERROR_IF(stride_y >= (16 << shift));
    // offset range is similarly limited to maximum 16 pixels irrespective
    // of shift. Both stride and offset fit in int16_t when shift=11.
    ERROR_IF(offset_x <= (-16 << shift) || offset_x >= (16 << shift));
    ERROR_IF(offset_y <= (-16 << shift) || offset_y >= (16 << shift));
}
for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
    unit = (resize_t == float_t) ? 1.0 : (1 << shift);
    y = oy * stride_y + offset_y;
    x = ox * stride_x + offset_x;
    if (resize_t == float_t) {
        iy = (int)apply_floor(y); dy = y - (float_t)iy;
        ix = (int)apply_floor(x); dx = x - (float_t)ix;
    } else {
        iy = y >> shift; dy = y - (iy<<shift);
        ix = x >> shift; dx = x - (ix<<shift);
    }
    iy0 = apply_max(iy, 0);
    iy1 = apply_min(iy+1, IH-1);
    ix0 = apply_max(ix, 0);
    ix1 = apply_min(ix+1, IW-1);
    REQUIRE(ix0 <= ix1 && iy0 <= iy1);
    if (mode==BILINEAR) {
        v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
        v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
        v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
        v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
        out_t acc = v00 * (unit - dy) * (unit - dx) + v01 * (unit - dy) * dx;
        acc = acc + v10 * dy * (unit-dx) + v11 * dy * dx;
        tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
    } else if (mode==NEAREST) {
        iy = (dy >= unit/2) ? iy1 : iy0;
        ix = (dx >= unit/2) ? ix1 : ix0;
        v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
        tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], v);
    }
}
----

*Supported Data Types:*

|===
|Profile|Mode|resize_t|in_t|out_t

|Any|signed 8,  bilinear|int16_t|int8_t|int32_t
|Any|signed 8,  nearest |int16_t|int8_t|int8_t
|Any|signed 16, bilinear|int16_t|int16_t|int48_t
|Any|signed 16, nearest |int16_t|int16_t|int16_t
|MI,MT|floating-point   |float_t|float_t|float_t
|===

*Resize Modes:*
|===
|Mode|Description

|NEAREST|Nearest Neighbor
|BILINEAR|Bilinear interpoloation
|===