diff options
author | Eric Kunze <eric.kunze@arm.com> | 2023-07-18 15:20:53 -0700 |
---|---|---|
committer | Eric Kunze <eric.kunze@arm.com> | 2023-08-17 09:32:28 -0700 |
commit | fb0284e2912bd5fd73bf6f476901490e04c330a2 (patch) | |
tree | 1784e40ad84a91e751679a4cbdf6cd33be1eefdb /chapters/tensor_ops.adoc | |
parent | b5b067819e5de11153b41cf3d26da4f3f9dd23e8 (diff) | |
download | specification-fb0284e2912bd5fd73bf6f476901490e04c330a2.tar.gz |
Change TOSA specification to signless types
Integer inputs and outputs to TOSA operators are now defined as signless
values. In most instances the operator will used signed arithmetic as
indicated in previous versions of the specification resulting in little
functional change to the specification.
New attributes have been added to the RESCALE operator to indicate
whether the input and output values should be treated as signed or unsigned.
Explicit use of static_cast, sign_extend, zero_extend and truncate are added
to the pseudocode to avoid ambiguity.
Change-Id: I71c67d3e5aeaabc418c768f821fce6ee3eebb65b
Diffstat (limited to 'chapters/tensor_ops.adoc')
-rw-r--r-- | chapters/tensor_ops.adoc | 129 |
1 files changed, 73 insertions, 56 deletions
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index 6387790..b9d54c1 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2022 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -31,12 +31,15 @@ if (axis == rank(shape1)-1) { ERROR_IF(flatten(left_shape, right_shape) != shape); for_each(left_index in left_shape) { for_each(right_index in right_shape) { - in_t max_value = minimum_value<in_t>; + in_t max_value = minimum_s<in_t>; out_t max_index = 0; for (i = 0; i < shape[axis]; i++) { dim_t index = flatten(left_index, [i], right_index); in_t value = tensor_read<in_t>(input, shape1, index); - if (value > max_value) { max_value = value; max_index = i; } + if (apply_max_s<in_t>(value, max_value) != max_value) { + max_value = value; + max_index = i; + } } dim_t index = flatten(left_index, right_index); tensor_write<out_t>(output, shape, index, max_index); @@ -54,8 +57,8 @@ include::{generated}/operators/AVG_POOL2D.adoc[] [source,c++] ---- -ERROR_IF(in_out_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(in_out_t != int8_t && output_zp != 0); // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && output_zp != 0); // Zero point only for int8_t ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1 ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); @@ -79,17 +82,19 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= c < C ) { // average, padding does not count if (0 <= y < IH and 0 <= x < IW) { count++; - acc_t value = tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]); - value = value - input_zp; - acc = apply_add<acc_t>(acc, value); + acc_t value = sign_extend<acc_t>(tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c])); + value = apply_sub_s<acc_t>(value, sign_extend<acc_t>(input_zp)); + acc = apply_add_s<acc_t>(acc, value); } } if (is_float(in_out_t)) { - output_val = acc / (float)count; + output_val = acc / static_cast<in_out_t>(count); } else { scale_t scale = reciprocal_scale(count); acc = apply_scale_32(acc, scale.multiplier, scale.shift, false); - output_val = (in_out_t)apply_clip<acc_t>(acc + output_zp, minimum<in_out_t>, maximum<in_out_t>) + acc = apply_add_s<acc_t>(acc, sign_extend<acc_t>(output_zp)); + acc = apply_clip_s<acc_t>(acc, minimum_s<in_out_t>, maximum_s<in_out_t>); + output_val = static_cast<in_out_t>(acc); } tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], output_val); } @@ -103,7 +108,7 @@ include::{generated}/operators/CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t ERROR_IF(weight_t != int8_t && weight_zp != 0); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_y < 1 || stride_x < 1); @@ -120,14 +125,18 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= y < IH && 0 <= x < IW) { - out_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic]); - out_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, + [N,IH,IW,IC], + [n,y,x,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, + [OC,KH,KW,IC], + [oc,ky,kx,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } ---- @@ -140,8 +149,8 @@ include::{generated}/operators/CONV3D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(pad_d0 < 0 || pad_d1 < 0 || pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_d < 1 || stride_y < 1 || stride_x < 1); ERROR_IF(dilation_d < 1 || dilation_y < 1 || dilation_x < 1); @@ -160,14 +169,18 @@ for_each(0 <= n < N, 0 <= od < OD, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= x < IW && 0 <= y < IH && 0 <= d < ID) { - out_t value = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic]); - out_t weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, + [N,ID,IH,IW,IC], + [n,d,y,x,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, + [OC,KD,KH,KW,IC], + [oc,kd,ky,kx,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); tensor_write<out_t>(output, [N,OD,OH,OW,OC], [n,od,oy,ox,oc], acc); } ---- @@ -180,8 +193,8 @@ include::{generated}/operators/DEPTHWISE_CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(dilation_y < 1 || dilation_x < 1); @@ -197,14 +210,18 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C, 0 <= m < M) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= y < IH && 0 <= x < IW) { - out_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c]); - out_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, + [N,IH,IW,C], + [n,y,x,c])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, + [KH,KW,C,M], + [ky,kx,c,m])); + value = apply_sub_s<out_t>(value, static_cast<out_t>input_zp); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>weight_zp); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : (c * M) + m]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : (c * M) + m]); tensor_write<out_t>(output, [N,OH,OW,C * M], [n,oy,ox,c * M + m], acc); } ---- @@ -262,20 +279,20 @@ include::{generated}/operators/FULLY_CONNECTED.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(BC != OC && BC != 1); for_each(0 <= n < N, 0 <= oc < OC) { out_t acc = 0; for_each(0 <= ic < IC) { - out_t value = tensor_read<in_t>(input, [N,IC], [n,ic]); - out_t weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, [N,IC], [n,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, [OC,IC], [oc,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); tensor_write<out_t>(output, [N,OC], [n,oc], acc); } ---- @@ -288,15 +305,15 @@ include::{generated}/operators/MATMUL.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t +ERROR_IF(in_t != i8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) { out_t acc = 0; for_each(0 <= c < C) { - out_t value1 = tensor_read<in_t>(A, [N,H,C], [n,h,c]); - out_t value2 = tensor_read<in_t>(B, [N,C,W], [n,c,w]); - value1 = value1 - A_zp; - value2 = value2 - B_zp; - acc = apply_add<out_t>(acc, value1 * value2); + out_t value1 = static_cast<out_t>(tensor_read<in_t>(A, [N,H,C], [n,h,c])); + out_t value2 = static_cast<out_t>(tensor_read<in_t>(B, [N,C,W], [n,c,w])); + value1 = apply_sub_s<out_t>(value1, static_cast<out_t>(A_zp)); + value2 = apply_sub_s<out_t>(value2, static_cast<out_t>(B_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value1 * value2)); } tensor_write<out_t>(output, [N,H,W], [n,h,w], acc); } @@ -329,7 +346,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { index_t x = ix + kx; if (y >= 0 && y < IH && x >= 0 && x < IW) { in_out_t value = tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]); - acc = apply_max(acc, value); + acc = apply_max_s<in_out_t>(acc, value); } } tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc); @@ -376,8 +393,8 @@ include::{generated}/operators/TRANSPOSE_CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only allowed for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only allowed for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(out_pad_top <= -KH || out_pad_bottom <= -KH); ERROR_IF(out_pad_left <= -KW || out_pad_right <= -KW); ERROR_IF(stride_y < 1 || stride_x < 1); @@ -393,12 +410,12 @@ for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, index_t oy = iy * stride_y + out_pad_top + ky; index_t ox = ix * stride_x + out_pad_left + kx; if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) { - out_t acc = tensor_read<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc]); - out_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic]); - out_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t acc = static_cast<out_t>(tensor_read<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc])); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } } |