diff options
Diffstat (limited to 'chapters/tensor_ops.adoc')
-rw-r--r-- | chapters/tensor_ops.adoc | 293 |
1 files changed, 12 insertions, 281 deletions
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc index b3de433..3de5150 100644 --- a/chapters/tensor_ops.adoc +++ b/chapters/tensor_ops.adoc @@ -1,7 +1,7 @@ // // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited -// (C) COPYRIGHT 2020-2023 ARM Limited +// (C) COPYRIGHT 2020-2024 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted @@ -17,34 +17,7 @@ include::{generated}/operators/ARGMAX.adoc[] [source,c++] ---- -ERROR_IF(axis < 0 || axis >= rank(shape1)); -if (axis == 0) { - left_shape = []; -} else { - left_shape = shape1[0:axis - 1]; -} -if (axis == rank(shape1)-1) { - right_shape = []; -} else { - right_shape = shape1[axis+1:rank(shape1) - 1]; -} -ERROR_IF(flatten(left_shape, right_shape) != shape); -for_each(left_index in left_shape) { - for_each(right_index in right_shape) { - in_t max_value = minimum_s<in_t>; - out_t max_index = 0; - for (i = 0; i < shape[axis]; i++) { - dim_t index = flatten(left_index, [i], right_index); - in_t value = tensor_read<in_t>(input, shape1, index); - if (apply_max_s<in_t>(value, max_value) != max_value) { - max_value = value; - max_index = i; - } - } - dim_t index = flatten(left_index, right_index); - tensor_write<out_t>(output, shape, index, max_index); - } -} +include::{pseudocode}/operators/ARGMAX.tosac[lines=10..-1] ---- ==== AVG_POOL2D @@ -57,47 +30,7 @@ include::{generated}/operators/AVG_POOL2D.adoc[] [source,c++] ---- -ERROR_IF(in_out_t != i8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(in_out_t != i8_t && output_zp != 0); // Zero point only for int8_t -ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1 -ERROR_IF(stride_y < 1 || stride_x < 1); -ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); -// Padding must be less than kernel size to avoid -// a divide-by-zero. -ERROR_IF(pad_right >= kernel_x || pad_left >= kernel_x); -ERROR_IF(pad_top >= kernel_y || pad_bottom >= kernel_y); -ERROR_IF(OH != idiv_check(IH + pad_top + pad_bottom - kernel_y, stride_y) + 1); -ERROR_IF(OW != idiv_check(IW + pad_left + pad_right - kernel_x, stride_x) + 1); - -for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW, 0 <= c < C ) { - in_out_t output_val; - acc_t acc = 0; - int count = 0; - index_t iy = oy * stride_y - pad_top; - index_t ix = ox * stride_x - pad_left; - for_each(0 <= ky < kernel_y, 0 <= kx < kernel_x) { - index_t y = iy + ky; - index_t x = ix + kx; - // Only values from the input tensor are used to calculate the - // average, padding does not count - if (0 <= y < IH and 0 <= x < IW) { - count++; - acc_t value = sign_extend<acc_t>(tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c])); - value = apply_sub_s<acc_t>(value, sign_extend<acc_t>(input_zp)); - acc = apply_add_s<acc_t>(acc, value); - } - } - if (is_float(in_out_t)) { - output_val = acc / static_cast<in_out_t>(count); - } else { - scale_t scale = reciprocal_scale(count); - acc = apply_scale_32(acc, scale.multiplier, scale.shift, false); - acc = apply_add_s<acc_t>(acc, sign_extend<acc_t>(output_zp)); - acc = apply_clip_s<acc_t>(acc, minimum_s<in_out_t>, maximum_s<in_out_t>); - output_val = static_cast<in_out_t>(acc); - } - tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], output_val); -} +include::{pseudocode}/operators/AVG_POOL2D.tosac[lines=10..-1] ---- ==== CONV2D @@ -108,37 +41,7 @@ include::{generated}/operators/CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != int8_t && weight_zp != 0); -ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); -ERROR_IF(stride_y < 1 || stride_x < 1); -ERROR_IF(dilation_y < 1 || dilation_x < 1); -ERROR_IF(OH != idiv_check(IH - 1 + pad_top + pad_bottom - (KH - 1) * dilation_y, stride_y) + 1); -ERROR_IF(OW != idiv_check(IW - 1 + pad_left + pad_right - (KW - 1) * dilation_x, stride_x) + 1); -ERROR_IF(BC != OC && BC != 1); - -for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { - out_t acc = 0; - index_t iy = oy * stride_y - pad_top; - index_t ix = ox * stride_x - pad_left; - for_each(0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { - index_t y = iy + ky * dilation_y; - index_t x = ix + kx * dilation_x; - if (0 <= y < IH && 0 <= x < IW) { - out_t value = static_cast<out_t>(tensor_read<in_t>(input, - [N,IH,IW,IC], - [n,y,x,ic])); - out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, - [OC,KH,KW,IC], - [oc,ky,kx,ic])); - value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); - weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); - acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); - } - } - acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); - tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); -} +include::{pseudocode}/operators/CONV2D.tosac[lines=10..-1] ---- ==== CONV3D @@ -149,40 +52,7 @@ include::{generated}/operators/CONV3D.adoc[] [source,c++] ---- -ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != i8_t && weight_zp != 0); -ERROR_IF(pad_d0 < 0 || pad_d1 < 0 || pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); -ERROR_IF(stride_d < 1 || stride_y < 1 || stride_x < 1); -ERROR_IF(dilation_d < 1 || dilation_y < 1 || dilation_x < 1); -ERROR_IF(OD != idiv_check(ID - 1 + pad_d0 + pad_d1 - (KD - 1) * dilation_d, stride_d) + 1); -ERROR_IF(OH != idiv_check(IH - 1 + pad_top + pad_bottom - (KH - 1) * dilation_y, stride_y) + 1); -ERROR_IF(OW != idiv_check(IW - 1 + pad_left + pad_right - (KW - 1) * dilation_x, stride_x) + 1); -ERROR_IF(BC != OC && BC != 1); - -for_each(0 <= n < N, 0 <= od < OD, 0 <= oy < OH, 0 <= ox < OW; 0 <= oc < OC) { - out_t acc = 0; - index_t id = od * stride_d - pad_d0; - index_t iy = oy * stride_y - pad_top; - index_t ix = ox * stride_x - pad_left; - for_each(0 <= kd < KD, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { - index_t d = id + kd * dilation_d; - index_t y = iy + ky * dilation_y; - index_t x = ix + kx * dilation_x; - if (0 <= x < IW && 0 <= y < IH && 0 <= d < ID) { - out_t value = static_cast<out_t>(tensor_read<in_t>(input, - [N,ID,IH,IW,IC], - [n,d,y,x,ic])); - out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, - [OC,KD,KH,KW,IC], - [oc,kd,ky,kx,ic])); - value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); - weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); - acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); - } - } - acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); - tensor_write<out_t>(output, [N,OD,OH,OW,OC], [n,od,oy,ox,oc], acc); -} +include::{pseudocode}/operators/CONV3D.tosac[lines=10..-1] ---- ==== DEPTHWISE_CONV2D @@ -193,37 +63,7 @@ include::{generated}/operators/DEPTHWISE_CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != i8_t && weight_zp != 0); -ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); -ERROR_IF(stride_y < 1 || stride_x < 1); -ERROR_IF(dilation_y < 1 || dilation_x < 1); -ERROR_IF(OH != idiv_check(IH - 1 + pad_top + pad_bottom - (KH - 1) * dilation_y, stride_y) + 1); -ERROR_IF(OW != idiv_check(IW - 1 + pad_left + pad_right - (KW - 1) * dilation_x, stride_x) + 1); -ERROR_IF(BC != C*M && BC != 1); - -for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C, 0 <= m < M) { - out_t acc = 0; - index_t iy = oy * stride_y - pad_top; - index_t ix = ox * stride_x - pad_left; - for_each(0 <= ky < KH, 0 <= kx < KW) { - index_t y = iy + ky * dilation_y; - index_t x = ix + kx * dilation_x; - if (0 <= y < IH && 0 <= x < IW) { - out_t value = static_cast<out_t>(tensor_read<in_t>(input, - [N,IH,IW,C], - [n,y,x,c])); - out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, - [KH,KW,C,M], - [ky,kx,c,m])); - value = apply_sub_s<out_t>(value, static_cast<out_t>input_zp); - weight = apply_sub_s<out_t>(weight, static_cast<out_t>weight_zp); - acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); - } - } - acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : (c * M) + m]); - tensor_write<out_t>(output, [N,OH,OW,C * M], [n,oy,ox,c * M + m], acc); -} +include::{pseudocode}/operators/DEPTHWISE_CONV2D.tosac[lines=10..-1] ---- ==== FFT2D @@ -247,28 +87,7 @@ include::{generated}/operators/FFT2D.adoc[] [source,c++] ---- -ERROR_IF(!power_of_two(H)); -ERROR_IF(!power_of_two(W)); - -float sign_val = 1.0; - -if (inverse) { - sign_val = -1.0; -} - -for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W) { - in_out_t sum_real = 0.0; - in_out_t sum_imag = 0.0; - for_each(0 <= iy < H, 0 <= ix < W) { - in_out_t val_real = tensor_read<in_out_t>(input_real, [N,H,W], [n,iy,ix]); - in_out_t val_imag = tensor_read<in_out_t>(input_imag, [N,H,W], [n,iy,ix]); - float_t a = sign_val * 2 * pi() * ((iy * oy) / H + (ix * ox) / W); - sum_real += val_real * cos(a) + val_imag * sin(a); - sum_imag += -val_real * sin(a) + val_imag * cos(a); - } - tensor_write<in_out_t>(output_real, [N,H,W], [n,oy,ox], sum_real); - tensor_write<in_out_t>(output_imag, [N,H,W], [n,oy,ox], sum_imag); -} +include::{pseudocode}/operators/FFT2D.tosac[lines=10..-1] ---- ==== FULLY_CONNECTED @@ -279,22 +98,7 @@ include::{generated}/operators/FULLY_CONNECTED.adoc[] [source,c++] ---- -ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only for int8_t -ERROR_IF(weight_t != i8_t && weight_zp != 0); -ERROR_IF(BC != OC && BC != 1); - -for_each(0 <= n < N, 0 <= oc < OC) { - out_t acc = 0; - for_each(0 <= ic < IC) { - out_t value = static_cast<out_t>(tensor_read<in_t>(input, [N,IC], [n,ic])); - out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, [OC,IC], [oc,ic])); - value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); - weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); - acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); - } - acc = apply_add_s<out_t>(acc, bias[(BC == 1) ? 0 : oc]); - tensor_write<out_t>(output, [N,OC], [n,oc], acc); -} +include::{pseudocode}/operators/FULLY_CONNECTED.tosac[lines=10..-1] ---- ==== MATMUL @@ -305,18 +109,7 @@ include::{generated}/operators/MATMUL.adoc[] [source,c++] ---- -ERROR_IF(in_t != i8_t && (A_zp != 0 || B_zp != 0)); // Zero point only for int8_t -for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) { - out_t acc = 0; - for_each(0 <= c < C) { - out_t value1 = static_cast<out_t>(tensor_read<in_t>(A, [N,H,C], [n,h,c])); - out_t value2 = static_cast<out_t>(tensor_read<in_t>(B, [N,C,W], [n,c,w])); - value1 = apply_sub_s<out_t>(value1, static_cast<out_t>(A_zp)); - value2 = apply_sub_s<out_t>(value2, static_cast<out_t>(B_zp)); - acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value1 * value2)); - } - tensor_write<out_t>(output, [N,H,W], [n,h,w], acc); -} +include::{pseudocode}/operators/MATMUL.tosac[lines=10..-1] ---- ==== MAX_POOL2D @@ -327,30 +120,7 @@ include::{generated}/operators/MAX_POOL2D.adoc[] [source,c++] ---- -ERROR_IF(kernel_y < 1 || kernel_x < 1); // kernel size must be >= 1 -ERROR_IF(stride_y < 1 || stride_x < 1); -ERROR_IF(pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0); -// Padding must be less than kernel size, otherwise no -// input values will be used. -ERROR_IF(pad_right >= kernel_x || pad_left >= kernel_x); -ERROR_IF(pad_top >= kernel_y || pad_bottom >= kernel_y); -ERROR_IF(OH != idiv_check(IH + pad_top + pad_bottom - kernel_y, stride_y) + 1); -ERROR_IF(OW != idiv_check(IW + pad_left + pad_right - kernel_x, stride_x) + 1); - -for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { - in_out_t acc = minimum_value<in_out_t>; - index_t iy = oy * stride_y - pad_top; - index_t ix = ox * stride_x - pad_left; - for_each( 0 <= ky < kernel_y, 0 <= kx < kernel_x ) { - index_t y = iy + ky; - index_t x = ix + kx; - if (y >= 0 && y < IH && x >= 0 && x < IW) { - in_out_t value = tensor_read<in_out_t>(input, [N,IH,IW,C], [n,y,x,c]); - acc = apply_max_s<in_out_t>(acc, value); - } - } - tensor_write<in_out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc); -} +include::{pseudocode}/operators/MAX_POOL2D.tosac[lines=10..-1] ---- ==== RFFT2D @@ -368,21 +138,7 @@ include::{generated}/operators/RFFT2D.adoc[] [source,c++] ---- -ERROR_IF(!power_of_two(H)); -ERROR_IF(!power_of_two(W)); - -for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W/2 + 1) { - in_out_t sum_real = 0.0; - in_out_t sum_imag = 0.0; - for_each(0 <= iy < H, 0 <= ix < W) { - in_out_t val_real = tensor_read<in_out_t>(input_real, [N,H,W], [n,iy,ix]); - float_t a = 2 * pi() * ((iy * oy) / H + (ix * ox) / W); - sum_real += val_real * cos(a); - sum_imag += -val_real * sin(a); - } - tensor_write<in_out_t>(output_real, [N,H,W], [n,oy,ox], sum_real); - tensor_write<in_out_t>(output_imag, [N,H,W], [n,oy,ox], sum_imag); -} +include::{pseudocode}/operators/RFFT2D.tosac[lines=10..-1] ---- ==== TRANSPOSE_CONV2D @@ -393,30 +149,5 @@ include::{generated}/operators/TRANSPOSE_CONV2D.adoc[] [source,c++] ---- -ERROR_IF(in_t != i8_t && input_zp != 0); // Zero point only allowed for int8_t -ERROR_IF(weight_t != i8_t && weight_zp != 0); -ERROR_IF(out_pad_top <= -KH || out_pad_bottom <= -KH); -ERROR_IF(out_pad_left <= -KW || out_pad_right <= -KW); -ERROR_IF(stride_y < 1 || stride_x < 1); -ERROR_IF(OH != (IH - 1) * stride_y + out_pad_top + out_pad_bottom + KH); -ERROR_IF(OW != (IW - 1) * stride_x + out_pad_left + out_pad_right + KW); -ERROR_IF(BC != OC && BC != 1); - -for_each(index in [N, OH, OW, OC]) { - tensor_write<out_t>(output, [N,OH,OW,OC], index, bias[(BC == 1) ? 0 : index[3]]) -} -for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, - 0 <= ic < IC, 0 <= ky < KH, 0 <= kx < KW) { - index_t oy = iy * stride_y + out_pad_top + ky; - index_t ox = ix * stride_x + out_pad_left + kx; - if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) { - out_t acc = static_cast<out_t>(tensor_read<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc])); - out_t value = static_cast<out_t>(tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic])); - out_t weight = static_cast<out_t>(tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic])); - value = apply_sub_s<out_t>(value, static_cast<out_t>(input_zp)); - weight = apply_sub_s<out_t>(weight, static_cast<out_t>(weight_zp)); - acc = apply_add_s<out_t>(acc, apply_mul_s<out_t>(value, weight)); - tensor_write<out_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); - } -} +include::{pseudocode}/operators/TRANSPOSE_CONV2D.tosac[lines=10..-1] ---- |