diff options
author | Dominic Symes <dominic.symes@arm.com> | 2022-04-11 10:55:16 +0100 |
---|---|---|
committer | Dominic Symes <dominic.symes@arm.com> | 2022-04-14 09:49:19 +0100 |
commit | a6ac716b995cbfe31a2549d41c6abb58f4fdce39 (patch) | |
tree | e2bd92dcdc46e18ccc03ec97854aba62f27352ad /chapters | |
parent | a1ba656173cf49979f0139ebf91581a6ddd81c7a (diff) | |
download | specification-a6ac716b995cbfe31a2549d41c6abb58f4fdce39.tar.gz |
Specify output dimension formulae for tensor ops
For each tensor op clarify how the output dimensions
of output height (OH) and output width (OW) relate
to the input height (IH) and input width (IW).
Signed-off-by: Dominic Symes <dominic.symes@arm.com>
Change-Id: Iad36041f54e439c944cacf08afac5567dd2bad3f
Diffstat (limited to 'chapters')
-rw-r--r-- | chapters/image.adoc | 4 | ||||
-rw-r--r-- | chapters/pseudocode.adoc | 7 | ||||
-rw-r--r-- | chapters/tensor_ops.adoc | 62 |
3 files changed, 46 insertions, 27 deletions
diff --git a/chapters/image.adoc b/chapters/image.adoc index 16e83b5..039595e 100644 --- a/chapters/image.adoc +++ b/chapters/image.adoc @@ -67,8 +67,8 @@ input position (IH-1,IW-1). [source,c++] ---- // Derive the output dimensions from the input dimensions -OH = floor(((IH-1)*(1<<shift) - offset_y)/stride_y)) + 1 + border_y -OW = floor(((IW-1)*(1<<shift) - offset_x)/stride_x)) + 1 + border_x +OH = idiv((IH-1)*(1<<shift) - offset_y, stride_y) + 1 + border_y; +OW = idiv((IW-1)*(1<<shift) - offset_x, stride_x) + 1 + border_x; // Ensure the image size is supported by GPU APIs and that for integer // implementations, position * stride does not overflow int32_t. ERROR_IF(max(OH,OW,IH,IW) >= 16384); diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc index 238aa33..3f885c7 100644 --- a/chapters/pseudocode.adoc +++ b/chapters/pseudocode.adoc @@ -185,6 +185,13 @@ int idiv(int input1, int input2) { return input1 / input2; // Integer divide that truncates towards zero } +// Integer division that checks input1 is a multiple of input2 + +int idiv_check(int input1, int input2) { + ERROR_IF(input1 % input2 != 0); // input1 must be a multiple of input2 + return input1 / input2; // exact quotient without rounding +} + int length(in_t input) return number of elements in input list diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index 9a4ab88..7f39e81 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -80,13 +80,16 @@ When calculating the average, only the number of valid input tensor values, but |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] |Attribute|in_out_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. |Attribute|in_out_t|output_zp|-|Output tensor zero point. Must be zero for non-int8 types. -|Output|in_out_t*|output|[N,H,W,C]|Output tensor 4D +|Output|in_out_t*|output|[N,OH,OW,C]|Output tensor 4D |=== *Operation Function:* [source,c++] ---- +// Derive output dimensions from input dimensions and padding +OH = idiv_check(IH + pad_top + pad_bottom - kernel_y, stride_y) + 1; +OW = idiv_check(IW + pad_left + pad_right - kernel_x, stride_x) + 1; ERROR_IF(in_out_t != int8_t && input_zp != 0); // Zero point only for int8_t ERROR_IF(in_out_t != int8_t && output_zp != 0); // Zero point only for int8_t ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1 @@ -96,12 +99,8 @@ ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); // a divide-by-zero. ERROR_IF(pad_right >= kernel_x || pad_left >= kernel_x); ERROR_IF(pad_top >= kernel_y || pad_bottom >= kernel_y); -// Output shape must match expected shape given the input shape -// and arguments provided -ERROR_IF(H != idiv((IH + pad_top + pad_bottom + stride_y - kernel_y), stride_y)); -ERROR_IF(W != idiv((IW + pad_left + pad_right + stride_x - kernel_x), stride_x)); -for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { +for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= c < C ) { in_out_t output_val; acc_t acc = 0; int count = 0; @@ -126,7 +125,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { acc = apply_scale_32(acc, scale.multiplier, scale.shift, false); output_val = (in_out_t)apply_clip<acc_t>(acc + output_zp, minimum<in_out_t>, maximum<in_out_t>) } - tensor_write<in_out_t>(output, [N,H,W,C], [n,oy,ox,c], output_val); + tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], output_val); } ---- @@ -156,20 +155,23 @@ Performs a 2D convolution over the given tensor input, using the weight tensor. |Attribute|int*|dilation|[2]|[dilation_y, dilation_x] |Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. |Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types. -|Output|out_t*|output|[N,H,W,OC]|Output tensor +|Output|out_t*|output|[N,OH,OW,OC]|Output tensor |=== *Operation Function* [source,c++] ---- +// Derive output dimensions from input dimensions and padding +OH = idiv_check(IH-1 + pad_top + pad_bottom - (KH-1)*dilation_y, stride_y) + 1; +OW = idiv_check(IW-1 + pad_left + pad_right - (KW-1)*dilation_x, stride_x) + 1; ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t ERROR_IF(weight_t != int8_t && weight_zp != 0); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(dilation_y < 1 || dilation_x < 1); pad = flatten([0,0], pad, [0,0]); -for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { +for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { out_t acc = 0; iy = oy * stride_y - pad_top; ix = ox * stride_x - pad_left; @@ -185,7 +187,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { } } acc = apply_add<out_t>(acc, bias[oc]); - tensor_write<out_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc); + tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } ---- @@ -217,20 +219,24 @@ Performs a 3D convolution over the given input tensor. |Attribute|int*|dilation|[3]|[dilation_d, dilation_y, dilation_x] |Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. |Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types. -|Output|out_t*|output|[N,D,H,W,OC]|Output tensor +|Output|out_t*|output|[N,OD,OH,OW,OC]|Output tensor |=== *Operation Function* [source,c++] ---- +// Derive output dimensions from input dimensions and padding +OD = idiv_check(ID-1 + pad_d0 + pad_d1 - (KD-1)*dilation_d, stride_d) + 1; +OH = idiv_check(IH-1 + pad_top + pad_bottom - (KH-1)*dilation_y, stride_y) + 1; +OW = idiv_check(IW-1 + pad_left + pad_right - (KW-1)*dilation_x, stride_x) + 1; ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t ERROR_IF(weight_t != int8_t && weight_zp != 0); ERROR_IF(pad_d0 < 0 || pad_d1 < 0 || pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_d < 1 || stride_y < 1 || stride_x < 1); ERROR_IF(dilation_d < 1 || dilation_y < 1 || dilation_x < 1); pad = flatten([0,0], pad, [0,0]); -for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { +for_each(0 <= n < N, 0 <= od < OD, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { out_t acc = 0; id = od * stride_d - pad_d0; iy = oy * stride_y - pad_top; @@ -248,7 +254,7 @@ for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { } } acc = apply_add<out_t>(acc, bias[oc]); - tensor_write<out_t>(output, [N,D,H,W,OC], [n,od,oy,ox,oc], acc); + tensor_write<out_t>(output, [N,OD,OH,OW,OC], [n,od,oy,ox,oc], acc); } ---- @@ -281,20 +287,23 @@ Performs 2D convolutions separately over each channel of the given tensor input, |Attribute|int*|dilation|[2]|[dilation_y, dilation_x] |Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. |Attribute|weight_t|weight_zp|-|Weight zero point. Must be zero for non-int8 types. -|Output|out_t*|output|[N,H,W,C*M]|Output tensor +|Output|out_t*|output|[N,OH,OW,C*M]|Output tensor |=== *Operation Function* [source,c++] ---- +// Derive output dimensions from input dimensions and padding +OH = idiv_check(IH-1 + pad_top + pad_bottom - (KH-1)*dilation_y, stride_y) + 1; +OW = idiv_check(IW-1 + pad_left + pad_right - (KW-1)*dilation_x, stride_x) + 1; ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t ERROR_IF(weight_t != int8_t && weight_zp != 0); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(dilation_y < 1 || dilation_x < 1); pad = flatten([0,0], pad, [0,0]); -for_each(0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < C, 0 <= m < M) { +for_each(0 <= n<N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C, 0 <= m < M) { out_t acc = 0; iy = oy * stride_y - pad_top; ix = ox * stride_x - pad_left; @@ -302,7 +311,7 @@ for_each(0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < C, 0 <= m < M) { y = iy + ky * dilation_y; x = ix + kx * dilation_x; if (0 <= y < IH && 0 <= x < IW) { - out_t value = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c]); + out_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c]); out_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m]); value = value - input_zp; weight = weight - weight_zp; @@ -310,7 +319,7 @@ for_each(0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < C, 0 <= m < M) { } } acc = apply_add<out_t>(acc, bias[(c * M) + m]); - tensor_write<out_t>(output, [N,H,W,C * M], [n,oy,ox,c * M + m], acc); + tensor_write<out_t>(output, [N,OH,OW,C * M], [n,oy,ox,c * M + m], acc); } ---- @@ -428,13 +437,16 @@ This performs a max pooling over the given input tensor. A sliding window of siz |Attribute|int*|kernel|[2]|[kernel_y, kernel_x] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] -|Output|in_out_t*|output|[N,H,W,C]|Output tensor 4D +|Output|in_out_t*|output|[N,OH,OW,C]|Output tensor 4D |=== *Operation Function:* [source,c++] ---- +// Derive output dimensions from input dimensions and padding +OH = idiv_check(IH + pad_top + pad_bottom - kernel_y, stride_y) + 1; +OW = idiv_check(IW + pad_left + pad_right - kernel_x, stride_x) + 1; ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1 ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); @@ -442,10 +454,6 @@ ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); // input values will be used. ERROR_IF(pad_right >= kernel_x || pad_left >= kernel_x); ERROR_IF(pad_top >= kernel_y || pad_bottom >= kernel_y); -// Output shape must match expected shape given the input shape -// and arguments provided -ERROR_IF(H != idiv((IH + pad_top + pad_bottom + stride_y - kernel_y), stride_y)); -ERROR_IF(W != idiv((IW + pad_left + pad_right + stride_x - kernel_x), stride_x)); for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { in_out_t acc = minimum_value<in_out_t>; @@ -459,7 +467,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { acc = apply_max(acc, value); } } - tensor_write<in_out_t>(output, [N,H,W,C], [n,oy,ox,c], acc); + tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc); } ---- @@ -485,7 +493,7 @@ Performs a 2D transposed convolution over the given tensor input, using the weig |Input|in_t*|input|[N,IH,IW,IC]|Input tensor |Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW |Input (MT profile) Attribute (BI/MI profiles)|out_t*|bias|[OC]|Per output channel bias data. -|Attribute|int*|out_pad|[2]|[out_pad_top, out_pad_left] +|Attribute|int*|out_pad|[4]|[out_pad_top, out_pad_bottom, out_pad_left, out_pad_right] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|out_shape|[4]|[N,OH,OW,OC] |Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types. @@ -497,9 +505,13 @@ Performs a 2D transposed convolution over the given tensor input, using the weig [source,c++] ---- +// Derive output dimensions from input dimensions and padding +OH = (IH-1)*stride_y - out_pad_top - out_pad_bottom + KH; +OW = (IW-1)*stride_x - out_pad_left - out_pad_right + KW; ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only allowed for int8_t ERROR_IF(weight_t != int8_t && weight_zp != 0); -ERROR_IF(out_pad_top < 0 || out_pad_left < 0); +ERROR_IF(out_pad_top < 0 || out_pad_bottom < 0); +ERROR_IF(out_pad_left < 0 || out_pad_right < 0); ERROR_IF(stride_y < 1 || stride_x < 1); for_each(index in out_shape) { tensor_write<out_t>(output, [N,OH,OW,OC], index, bias[index[3]]) |