diff options
Diffstat (limited to 'chapters/tensor_ops.adoc')
-rw-r--r-- | chapters/tensor_ops.adoc | 129 |
1 files changed, 73 insertions, 56 deletions
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index 6387790..b9d54c1 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2022 ARM Limited +// (C) COPYRIGHT 2020-2023 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -31,12 +31,15 @@ if (axis == rank(shape1)-1) { ERROR_IF(flatten(left_shape, right_shape) != shape); for_each(left_index in left_shape) { for_each(right_index in right_shape) { - in_t max_value = minimum_value<in_t>; + in_t max_value = minimum_s<in_t>; out_t max_index = 0; for (i = 0; i < shape[axis]; i++) { dim_t index = flatten(left_index, [i], right_index); in_t value = tensor_read<in_t>(input, shape1, index); - if (value > max_value) { max_value = value; max_index = i; } + if (apply_max_s<in_t>(value, max_value) != max_value) { + max_value = value; + max_index = i; + } } dim_t index = flatten(left_index, right_index); tensor_write<out_t>(output, shape, index, max_index); @@ -54,8 +57,8 @@ include::{generated}/operators/AVG_POOL2D.adoc[] [source,c++] ---- -ERROR_IF(in_out_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(in_out_t != int8_t && output_zp != 0); // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(in_out_t != i8_t && output_zp != 0); // Zero point only for int8_t ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1 ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); @@ -79,17 +82,19 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= c < C ) { // average, padding does not count if (0 <= y < IH and 0 <= x < IW) { count++; - acc_t value = tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]); - value = value - input_zp; - acc = apply_add<acc_t>(acc, value); + acc_t value = sign_extend<acc_t>(tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c])); + value = apply_sub_s<acc_t>(value, sign_extend<acc_t>(input_zp)); + acc = apply_add_s<acc_t>(acc, value); } } if (is_float(in_out_t)) { - output_val = acc / (float)count; + output_val = acc / static_cast<in_out_t>(count); } else { scale_t scale = reciprocal_scale(count); acc = apply_scale_32(acc, scale.multiplier, scale.shift, false); - output_val = (in_out_t)apply_clip<acc_t>(acc + output_zp, minimum<in_out_t>, maximum<in_out_t>) + acc = apply_add_s<acc_t>(acc, sign_extend<acc_t>(output_zp)); + acc = apply_clip_s<acc_t>(acc, minimum_s<in_out_t>, maximum_s<in_out_t>); + output_val = static_cast<in_out_t>(acc); } tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], output_val); } @@ -103,7 +108,7 @@ include::{generated}/operators/CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t ERROR_IF(weight_t != int8_t && weight_zp != 0); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_y < 1 || stride_x < 1); @@ -120,14 +125,18 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= y < IH && 0 <= x < IW) { - out_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic]); - out_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, + [N,IH,IW,IC], + [n,y,x,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, + [OC,KH,KW,IC], + [oc,ky,kx,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } ---- @@ -140,8 +149,8 @@ include::{generated}/operators/CONV3D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(pad_d0 < 0 || pad_d1 < 0 || pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_d < 1 || stride_y < 1 || stride_x < 1); ERROR_IF(dilation_d < 1 || dilation_y < 1 || dilation_x < 1); @@ -160,14 +169,18 @@ for_each(0 <= n < N, 0 <= od < OD, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= x < IW && 0 <= y < IH && 0 <= d < ID) { - out_t value = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic]); - out_t weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, + [N,ID,IH,IW,IC], + [n,d,y,x,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, + [OC,KD,KH,KW,IC], + [oc,kd,ky,kx,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); tensor_write<out_t>(output, [N,OD,OH,OW,OC], [n,od,oy,ox,oc], acc); } ---- @@ -180,8 +193,8 @@ include::{generated}/operators/DEPTHWISE_CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); ERROR_IF(stride_y < 1 || stride_x < 1); ERROR_IF(dilation_y < 1 || dilation_x < 1); @@ -197,14 +210,18 @@ for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C, 0 <= m < M) { index_t y = iy + ky * dilation_y; index_t x = ix + kx * dilation_x; if (0 <= y < IH && 0 <= x < IW) { - out_t value = tensor_read<in_t>(input, [N,IH,IW,C], [n,y,x,c]); - out_t weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, + [N,IH,IW,C], + [n,y,x,c])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, + [KH,KW,C,M], + [ky,kx,c,m])); + value = apply_sub_s<out_t>(value, static_cast<out_t>input_zp); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>weight_zp); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : (c * M) + m]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : (c * M) + m]); tensor_write<out_t>(output, [N,OH,OW,C * M], [n,oy,ox,c * M + m], acc); } ---- @@ -262,20 +279,20 @@ include::{generated}/operators/FULLY_CONNECTED.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(BC != OC && BC != 1); for_each(0 <= n < N, 0 <= oc < OC) { out_t acc = 0; for_each(0 <= ic < IC) { - out_t value = tensor_read<in_t>(input, [N,IC], [n,ic]); - out_t weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, [N,IC], [n,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, [OC,IC], [oc,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); } - acc = apply_add<out_t>(acc, bias[(BC == 1) ? 0 : oc]); + acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); tensor_write<out_t>(output, [N,OC], [n,oc], acc); } ---- @@ -288,15 +305,15 @@ include::{generated}/operators/MATMUL.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t +ERROR_IF(in_t != i8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) { out_t acc = 0; for_each(0 <= c < C) { - out_t value1 = tensor_read<in_t>(A, [N,H,C], [n,h,c]); - out_t value2 = tensor_read<in_t>(B, [N,C,W], [n,c,w]); - value1 = value1 - A_zp; - value2 = value2 - B_zp; - acc = apply_add<out_t>(acc, value1 * value2); + out_t value1 = static_cast<out_t>(tensor_read<in_t>(A, [N,H,C], [n,h,c])); + out_t value2 = static_cast<out_t>(tensor_read<in_t>(B, [N,C,W], [n,c,w])); + value1 = apply_sub_s<out_t>(value1, static_cast<out_t>(A_zp)); + value2 = apply_sub_s<out_t>(value2, static_cast<out_t>(B_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value1 * value2)); } tensor_write<out_t>(output, [N,H,W], [n,h,w], acc); } @@ -329,7 +346,7 @@ for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { index_t x = ix + kx; if (y >= 0 && y < IH && x >= 0 && x < IW) { in_out_t value = tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]); - acc = apply_max(acc, value); + acc = apply_max_s<in_out_t>(acc, value); } } tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc); @@ -376,8 +393,8 @@ include::{generated}/operators/TRANSPOSE_CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != int8_t && input_zp != 0); // Zero point only allowed for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); +ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only allowed for int8_t +ERROR_IF(weight_t != i8_t && weight_zp != 0); ERROR_IF(out_pad_top <= -KH || out_pad_bottom <= -KH); ERROR_IF(out_pad_left <= -KW || out_pad_right <= -KW); ERROR_IF(stride_y < 1 || stride_x < 1); @@ -393,12 +410,12 @@ for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, index_t oy = iy * stride_y + out_pad_top + ky; index_t ox = ix * stride_x + out_pad_left + kx; if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) { - out_t acc = tensor_read<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc]); - out_t value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic]); - out_t weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic]); - value = value - input_zp; - weight = weight - weight_zp; - acc = apply_add<out_t>(acc, value * weight); + out_t acc = static_cast<out_t>(tensor_read<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc])); + out_t value = static_cast<out_t>(tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic])); + out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic])); + value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); + weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); + acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } } |