aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Kunze <eric.kunze@arm.com>2022-06-16 12:21:31 -0700
committerEric Kunze <eric.kunze@arm.com>2022-06-17 20:38:16 +0000
commit4c6b3d8058af46b930f882686e776f9e72c3f6db (patch)
tree78da6947c9da4eea50b2b5218482aad67b03c0bc
parent42229d03fe55c45f0ad2ba68f190f3d68a78ae79 (diff)
downloadspecification-4c6b3d8058af46b930f882686e776f9e72c3f6db.tar.gz
Revert RESIZE behvior to the 0.23 version
The current version does not match the reference model or serialization library. Revert to the old behavior until the model is updated and tested that it works correctly. Signed-off-by: Eric Kunze <eric.kunze@arm.com> Change-Id: I237dc3e94e6c31337073524527da75084ba7b578
-rw-r--r--chapters/image.adoc134
1 files changed, 63 insertions, 71 deletions
diff --git a/chapters/image.adoc b/chapters/image.adoc
index 690480c..6f1d3cc 100644
--- a/chapters/image.adoc
+++ b/chapters/image.adoc
@@ -13,40 +13,34 @@
Resizes a tensor. Resize is only allowed in the H and W dimensions.
-The height dimension is scaled by factor (scale_y_n/scale_y_d).
-The width dimension is scaled by factor (scale_x_n/scale_x_d).
-
The NEAREST_NEIGHBOR mode returns the value of the input tensor closest to the
calculated sample position for both floating-point and integer data formats.
Floating-point BILINEAR mode returns a bilinearly interpolated output value
based on the four closest input sample positions.
-For integer BILINEAR interpolation mode, the output value must
-be scaled by 1/(scale_y_n * scale_x_n) in a following operation to
-complete the interpolation (for example with a RESCALE operator).
+For integer BILINEAR interpolation mode, the output value is calculated by using
+the shift value along with the other parameters to create a fixed point scaling
+factor for each input. These values are then summed to create the value for
+output, which has 2 * shift fractional bits. To convert back to the original
+integer size, the output value must be rescaled.
The following examples show practical uses of the parameters:
* For approximate uniform input sampling between (0, 0) and (IH-1, IW-1) set
-** scale_y_n/scale_y_d = (OH-1)/(IH-1) as integer ratios
-** scale_x_n/scale_x_d = (OW-1)/(IW-1) as integer ratios
-** offset_x = 0, offset_y = 0, border_x = 0, border_y = 0
-
-* For power of two upscale [OH-1,OW-1] = (1<<k) * [IH-1, IW-1],
-sampling between (0,0) and (IH-1,IW-1), set:
-** scale_y_n = (1<<k), scale_y_d = 1, offset_y = 0, border_y = 0
-** scale_x_n = (1<<k), scale_x_d = 1, offset_x = 0, border_x = 0
-
-* For power of two upscale [OH,OW] = (1<<k) * [IH,IW],
-sampling range approximately (-0.5, -0.5) to (IH-0.5, IW-0.5), set:
-** scale_y_n = 2<<k, scale_y_d = 2, offset_y = -(1<<k)+1, border_y = (1<<k)-1
-** scale_x_n = 2<<k, scale_x_d = 2, offset_x = -(1<<k)+1, border_x = (1<<k)-1
-
-The output dimensions can be derived from the input dimensions by inverting
-the scale as described in the pseudocode. The [border_y, border_x] values
-adjust the output size to allow fractional sampling beyond integer
-input position (IH-1,IW-1).
+stride_y = ( (IH-1) * (1<<shift) ) / (OH-1),
+stride_x = ( (IW-1) * (1<<shift) ) / (OW-1),
+offset_x=0, offset_y=0, border_x=0, border_y=0.
+
+* For power of two upscale by factor (1<<k) the following parameters can
+be used for fixed point upscales:
+** For upscale [OH-1,OW-1] = (1<<k) * [IH-1, IW-1] set
+shift=k, stride_y=1, stride_x=1, offset_x=0, offset_y=0,
+border_x=0, border_y=0.
+** For upscale [OH,OW] = (1<<k) * [IH,IW] set
+shift=(k+1), stride_y=2, stride_x=2, offset_x=-(1<<k)+1, offset_y=-(1<<k)+1,
+border_x=1<<(k-1), border_y=1<<(k-1). This samples approximately
+the input area (-0.5, -0.5) to (IH-0.5, IW-0.5).
*Arguments:*
@@ -54,9 +48,11 @@ input position (IH-1,IW-1).
|Argument|Type|Name|Shape|Description
|Input|in_t*|input|[N,IH,IW,C]|Input tensor
-|Attribute|int16_t *|scale|[4]|[scale_y_n, scale_y_d, scale_x_n, scale_x_d]
-|Attribute|int16_t *|offset|[2]|[offset_y, offset_x]
+|Attribute|int32_t* |output_size|[2]|[OH,OW]
+|Attribute|resize_t*|stride|[2]|[stride_y, stride_x]
+|Attribute|resize_t*|offset|[2]|[offset_y, offset_x]
|Attribute|int32_t* |border|[2]|[border_y, border_x]
+|Attribute|int32_t |shift|-|Shift value (must be zero if resize_t is float)
|Attribute|mode_t|mode|-|BILINEAR or NEAREST
|Output|out_t*|output|[N,OH,OW,C]|Output tensor
|===
@@ -65,61 +61,57 @@ input position (IH-1,IW-1).
[source,c++]
----
+// Derive the output dimensions from the input dimensions
+OH = idiv((IH-1)*(1<<shift) - offset_y, stride_y) + 1 + border_y;
+OW = idiv((IW-1)*(1<<shift) - offset_x, stride_x) + 1 + border_x;
// Ensure the image size is supported by GPU APIs and that for integer
// implementations, position * stride does not overflow int32_t.
ERROR_IF(max(OH,OW,IH,IW) >= 16384);
-ERROR_IF(scale_y_n <= 0 || scale_y_d <=0 || scale_x_n <=0 || scale_x_d <=0);
-// if in_t=int8_t ensure that an int32_t accumulator can be used
-ERROR_IF(scale_y_n > (1<<11) || scale_x_n > (1<<11));
-// set a consistent lower limit of 1/16 downscale to simplify implementations
-ERROR_IF(scale_y_d >= 16 * scale_y_n || scale_x_d >= 16 * scale_x_n);
-ERROR_IF(offset_y < -scale_y_n || offset_y >= 16*scale_y_n);
-ERROR_IF(offset_x < -scale_x_n || offset_x >= 16*scale_x_n);
-ERROR_IF(border_y < -16*scale_y_n || border_y >= scale_y_n);
-ERROR_IF(border_x < -16*scale_x_n || border_x >= scale_x_n);
-ERROR_IF(OH != idiv_check((IH-1)*scale_y_n - offset_y + border_y, scale_y_d) + 1);
-ERROR_IF(OW != idiv_check((IW-1)*scale_x_n - offset_x + border_x, scale_x_d) + 1);
+ERROR_IF(stride_x <= 0 || stride_y <= 0);
+if (is_floating_point(resize_t)) {
+ // The shift attribute is not used for floating point
+ ERROR_IF(shift != 0);
+ ERROR_IF(stride_x > IW || stride_y > IH);
+} else {
+ // if in_t=int8_t ensure that an int32_t accumulator can be used
+ ERROR_IF(shift < 1 || shift > 11);
+ // set a consistent lower limit of 1/16 downscale
+ // independent of the shift value to simplify implementations
+ ERROR_IF(stride_x >= (16 << shift));
+ ERROR_IF(stride_y >= (16 << shift));
+ // offset range is similarly limited to maximum 16 pixels irrespective
+ // of shift. Both stride and offset fit in int16_t when shift=11.
+ ERROR_IF(offset_x <= (-16 << shift) || offset_x >= (16 << shift));
+ ERROR_IF(offset_y <= (-16 << shift) || offset_y >= (16 << shift));
+}
for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
- out_t acc;
- resize_t dx, dy;
-
- int32_t y = oy * scale_y_d + offset_y;
- int32_t x = ox * scale_x_d + offset_x;
- int16_t iy = floor(y / scale_y_n);
- int16_t ix = floor(x / scale_x_n);
-
+ unit = (is_floating_point(resize_t)) ? 1.0 : (1 << shift);
+ y = oy * stride_y + offset_y;
+ x = ox * stride_x + offset_x;
if (is_floating_point(resize_t)) {
- dy = ((resize_t)y / (resize_t)scale_y_n) - iy;
- dx = ((resize_t)x / (resize_t)scale_x_n) - ix;
+ iy = (int32_t)apply_floor(y); dy = y - (resize_t)iy;
+ ix = (int32_t)apply_floor(x); dx = x - (resize_t)ix;
} else {
- dy = y - iy * scale_y_n;
- dx = y - ix * scale_x_n;
+ iy = y >> shift; dy = y - (iy<<shift);
+ ix = x >> shift; dx = x - (ix<<shift);
}
- // Note that -1 <= iy < IH and -1 <= ix < IW
- int16_t iy0 = apply_max(iy, 0);
- int16_t iy1 = apply_min(iy+1, IH-1);
- int16_t ix0 = apply_max(ix, 0);
- int16_t ix1 = apply_min(ix+1, IW-1);
+ iy0 = apply_max(iy, 0);
+ iy1 = apply_min(iy+1, IH-1);
+ ix0 = apply_max(ix, 0);
+ ix1 = apply_min(ix+1, IW-1);
+ REQUIRE(ix0 <= ix1 && iy0 <= iy1);
if (mode==BILINEAR) {
- in_t v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
- in_t v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
- in_t v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
- in_t v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
- acc = v00 * (scale_y_n - dy) * (scale_x_n - dx);
- acc += v01 * (scale_y_n - dy) * dx;
- acc += v10 * dy * (scale_x_n - dx);
- acc += v11 * dy * dx;
+ v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
+ v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
+ v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
+ v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
+ out_t acc = v00 * (unit - dy) * (unit - dx) + v01 * (unit - dy) * dx;
+ acc = acc + v10 * dy * (unit-dx) + v11 * dy * dx;
tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
} else if (mode==NEAREST) {
- int32_t iy, ix;
- if (is_floating_point(resize_t)) {
- iy = (dy >= 0.5) ? iy1 : iy0;
- ix = (dx >= 0.5) ? ix1 : ix0;
- } else {
- iy = (2*dy >= scale_y_n) ? iy1 : iy0;
- ix = (2*dx >= scale_x_n) ? ix1 : ix0;
- }
- in_t v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
+ iy = (dy >= unit/2) ? iy1 : iy0;
+ ix = (dx >= unit/2) ? ix1 : ix0;
+ v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], v);
}
}