// // This confidential and proprietary software may be used only as // authorised by a licensing agreement from ARM Limited // (C) COPYRIGHT 2020-2021 ARM Limited // ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised // copies and copies may only be made to the extent permitted // by a licensing agreement from ARM Limited. === Tensor Operators ==== ARGMAX This returns the index with the largest value across the given axis of the input tensor. *Arguments* |=== |Argument|Type|Name|Shape|Description |Input|in_t*|input|shape1|Input tensor with rank from 1 to 4 |Attribute|int|axis|-|Axis in range from 0 to rank(shape1)-1 |Output|out_t*|output|shape|Output tensor, with rank = rank(shape1)-1 |=== *Quantization Parameters:* None *Operation Function:* [source,c++] ---- assert(axis >= 0 && axis < rank(shape1) && rank(shape1) <= 4); if (axis == 0) { left_shape = []; } else { left_shape = shape1[0:axis - 1]; } if (axis == rank(shape1)-1) { right_shape = []; } else { right_shape = shape1[axis+1:rank(shape1) - 1]; } assert(flatten(left_shape, right_shape) == shape); for_each(left_index in left_shape) { for_each(right_index in right_shape) { in_t max_value = minimum_value; int32_t max_index = 0; for (i = 0; i < shape[axis]; i++) { index = flatten(left_index, [i], right_index); in_t value = tensor_read(input, shape1, index); if (value > max_value) { max_value = value; max_index = i; } } index = flatten(left_index, right_index); tensor_write(output, shape, index, max_index); } } ---- *Supported Data Types:* |=== |Profile|Mode|in_t|out_t |Any|signed 8|int8_t|int32_t |Any|signed 16|int16_t|int32_t |MI, MT|floating-point|float_t|int32_t |=== ==== AVG_POOL2D This performs an average pooling over the given input tensor. A sliding window of size given by is passed over the input tensor, with the mean value being placed in the output tensor. *Arguments:* |=== |Argument|Type|Name|Shape|Description |Input|in_t*|input|[N,H,W,C]|Input tensor 4D |Attribute|int*|kernel|[2]|[kernel_y, kernel_x] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] |Output|in_t*|output|[N,H,W,C]|Output tensor 4D |=== *Quantization Parameters:* |=== |Argument|Type|Name|Shape|Description |Attribute|in_t|input_zp|-|Input tensor zero point |Attribute|in_t|output_zp|-|Output tensor zero point |=== *Operation Function:* [source,c++] ---- assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t assert(in_t == int8_t || output_zp == 0); // Zero point only for int8_t pad = flatten([0,0], pad, [0,0]); for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { in_t output_val; acc_t acc = 0; int count = 0; iy = oy * stride_y - pad_top; ix = ox * stride_x - pad_left; for_each(0 <= ky < kernel_y, 0 <= kx < kernel_x) { y = iy + ky; x = ix + kx; acc_t value = tensor_read(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad); acc = apply_add(acc, value); if (0 <= y < IH and 0 <= x < IW) count++ } if (is_float(out_t)) { output_val = acc / (float)count; } else { scale_t scale = reciprocal_scale(count); acc = apply_scale_32(acc, scale.multiplier, scale.shift, false); output_val = (in_t)apply_clip(acc + output_zp, minimum, maximum) } tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], output_val); } ---- *Supported Data Types:* |=== |Profile|Mode|in_t|acc_t |Any|signed 8|int8_t|int32_t |Any|signed 16|int16_t|int32_t |MI, MT|floating-point|float_t|float_t |=== ==== CONV2D Performs a 2D convolution over the given tensor input, using the weight tensor. *Arguments:* |=== |Argument|Type|Name|Shape|Description |Input|in_t*|input|[N,IH,IW,IC]|Input tensor |Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW |Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[OC]|Per output channel bias data. |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|dilation|[2]|[dilation_y, dilation_x] |Output|acc_t*|output|[N,H,W,OC]|Output tensor |=== *Quantization Parameters:* |=== |Argument|Type|Name|Shape|Description |Attribute|in_t|input_zp|-|Input tensor zero point |Attribute|weight_t|weight_zp|-|Weight zero point |=== *Operation Function* [source,c++] ---- assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t assert(weight_t == int8_t || weight_zp == 0); pad = flatten([0,0], pad, [0,0]); for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { acc_t acc = 0; iy = oy * stride_y - pad_top; ix = ox * stride_x - pad_left; for_each(0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { y = iy + ky * dilation_y; x = ix + kx * dilation_x; acc_t value = tensor_read(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad); acc_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp); acc = apply_add(acc, value * weight); } acc = apply_add(acc, bias[oc]); tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], acc); } ---- *Supported Data Types:* |=== |Profile|Mode|in_t|weight_t|acc_t |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t |Any|signed 16x8|int16_t|int8_t|int48_t |MI, MT|floating-point|float_t|float_t|float_t |=== ==== CONV3D Performs a 3D convolution over the given input tensor. *Arguments:* |=== |Argument|Type|Name|Shape|Description |Input|in_t*|input|[N,ID,IH,IW,IC]|Input tensor |Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KD,KH,KW,IC]|Weight kernel size KDxKHxKW |Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[OC]|Per output channel bias data. |Attribute|int*|pad|[6]|[pad_d0, pad_d1, pad_top, pad_bottom, pad_left, pad_right] |Attribute|int*|stride|[3]|[stride_d, stride_y, stride_x] |Attribute|int*|dilation|[3]|[dilation_d, dilation_y, dilation_x] |Output|acc_t*|output|[N,D,H,W,OC]|Output tensor |=== *Quantization Parameters:* |=== |Argument|Type|Name|Shape|Description |Attribute|in_t|input_zp|-|Input tensor zero point |Attribute|weight_t|weight_zp|-|Weight zero point |=== *Operation Function* [source,c++] ---- assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t assert(weight_t == int8_t || weight_zp == 0); pad = flatten([0,0], pad, [0,0]); for_each(0 <= n < N, 0 <= od < D, 0 <= oy < H, 0 <= ox < W; 0 <= oc < OC) { acc_t acc = 0; id = od * stride_d - pad_d0; iy = oy * stride_y - pad_top; ix = ox * stride_x - pad_left; for_each(0 <= kd < KD, 0 <= ky < KH, 0 <= kx < KW, 0 <= ic < IC) { d = id + kd * dilation_d; y = iy + ky * dilation_y; x = ix + kx * dilation_x; acc_t value = tensor_read(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad); acc_t weight = tensor_read(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp); acc = apply_add(acc, value * weight); } acc = apply_add(acc, bias[oc]); tensor_write(output, [N,D,H,W,OC], [n,od,oy,ox,oc], acc); } ---- *Supported Data Types:* |=== |Profile|Mode|in_t|weight_t|acc_t |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t |Any|signed 16x8|int16_t|int8_t|int48_t |MI, MT|floating-point|float_t|float_t|float_t |=== ==== DEPTHWISE_CONV2D Performs 2D convolutions separately over each channel of the given tensor input, using the weight tensor. *Arguments:* |=== |Argument|Type|Name|Shape|Description |Input|in_t*|input|[N,H,W,C]|Input tensor |Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[KH,KW,C,M]|Weight kernel size KH x KW |Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[C*M]|Per output channel bias data. |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|dilation|[2]|[dilation_y, dilation_x] |Output|acc_t*|output|[N,H,W,C*M]|Output tensor |=== *Quantization Parameters:* |=== |Argument|Type|Name|Shape|Description |Attribute|in_t|input_zp|-|Input tensor zero point |Attribute|weight_t|weight_zp|-|Weight zero point |=== *Operation Function* [source,c++] ---- assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t assert(weight_t == int8_t || weight_zp == 0); pad = flatten([0,0], pad, [0,0]); for_each(0 <= n(input, [N,H,W,C], [n,y,x,c], input_zp, pad); acc_t weight = tensor_read(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp); acc = apply_add(acc, value * weight); } acc = apply_add(acc, bias[(c * M) + m]); tensor_write(output, [N,H,W,C * M], [n,oy,ox,c * M + m], acc); } ---- *Supported Data Types:* |=== |Profile|Mode|in_t|weight_t|acc_t |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t |Any|signed 16x8|int16_t|int8_t|int48_t |MI, MT|floating-point|float_t|float_t|float_t |=== ==== FULLY_CONNECTED Performs a fully connected network. *Arguments:* |=== |Argument|Type|Name|Shape|Description |Input|in_t*|input|[N,IC]|Input tensor |Attribute|weight_t*|weight|[OC,IC]|Weights |Attribute|acc_t*|bias|[OC]|Per output channel bias data. |Output|acc_t*|output|[N,OC]|Output tensor |=== *Quantization Parameters:* |=== |Argument|Type|Name|Shape|Description |Attribute|in_t|input_zp|-|Input tensor zero point |Attribute|weight_t|weight_zp|-|Weight zero point |=== *Operation Function* [source,c++] ---- assert(in_t == int8_t || input_zp == 0); // Zero point only for int8_t assert(weight_t == int8_t || weight_zp == 0); for_each(0 <= n < N, 0 <= oc < OC) { acc_t acc = 0; for_each(0 <= ic < IC) { acc_t value = tensor_read(input, [N,IC], [n,ic], input_zp); acc_t weight = tensor_read(weight, [OC,IC], [oc,ic], weight_zp); acc = apply_add(acc, value * weight); } acc = apply_add(acc, bias[oc]); tensor_write(output, [N,OC], [n,oc], acc); } ---- *Supported Data Types:* |=== |Profile|Mode|in_t|weight_t|acc_t |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t |Any|signed 16x8 |int16_t|int8_t|int48_t |MI, MT|floating-point|float_t|float_t|float_t |=== ==== MATMUL Performs two dimensional matrix multiplications. This allows both inputs to be activations, rather than reserving weights as an attribute in the FULLY_CONNECTED operator. *Arguments:* |=== |Argument|Type|Name|Shape|Description |Input|in_t*|A|[N,H,C]|Input tensor A, N matrices of size HxC |Input|in_t*|B|[N,C,W]|Input tensor B, N matrices of size CxW |Output|acc_t*|output|[N,H,W]|Output tensor, N matrices of size HxW |=== *Quantization Parameters:* |=== |Argument|Type|Name|Shape|Description |Attribute|in_t|A_zp|-|Input tensor A zero point |Attribute|in_t|B_zp|-|Input tensor B zero point |=== *Operation Function* [source,c++] ---- assert(in_t == int8_t || (A_zp == 0 && B_zp == 0)); // Zero point only for int8_t for_each(0 <= n < N, 0 <= h < H, 0 <= w < W) { acc_t acc = 0; for_each(0 <= c < C) { acc_t value1 = tensor_read(A, [N,H,C], [n,h,c], A_zp); acc_t value2 = tensor_read(B, [N,C,W], [n,c,w], B_zp); acc = apply_add(acc, value1 * value2); } tensor_write(output, [N,H,W], [n,h,w], acc); } ---- *Supported Data Types:* |=== |Profile|Mode|in_t|acc_t |Any|signed 8x8|int8_t|int32_t |Any|signed 16x16|int16_t|int48_t |MI, MT|floating-point|float_t|float_t |=== ==== MAX_POOL2D This performs a max pooling over the given input tensor. A sliding window of size given by is passed over the input tensor, with the maximum value being placed in the output tensor. *Arguments:* |=== |Argument|Type|Name|Shape|Description |Input|in_t*|input|[N,H,W,C]|Input tensor 4D |Attribute|int*|kernel|[2]|[kernel_y, kernel_x] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] |Output|in_t*|output|[N,H,W,C]|Output tensor 4D |=== *Quantization Parameters:* None *Operation Function:* [source,c++] ---- pad = flatten([0,0], pad, [0,0]); for_each(0 <= n < N, 0 <= oy < H, 0 <= ox < W, 0 <= c < C ) { in_t acc = minimum_value; iy = oy * stride_y - pad_top; ix = ox * stride_x - pad_left; for_each( 0<=ky(input, [N,IH,IW,IC], [n,y,x,c], pad); acc = apply_max(acc, value); } tensor_write(output, [N,H,W,OC], [n,oy,ox,oc], acc); } ---- *Supported Data Types:* |=== |Profile|Mode|in_t |Any|signed 8|int8_t |Any|16-bit|int16_t |MI, MT|floating-point|float_t |=== ==== TRANSPOSE_CONV2D Performs a 2D transposed convolution over the given tensor input, using the weights tensor. *Arguments:* |=== |Argument|Type|Name|Shape|Description |Input|in_t*|input|[N,IH,IW,IC]|Input tensor |Input (MT profile) Attribute (BI/MI profiles)|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW |Input (MT profile) Attribute (BI/MI profiles)|acc_t*|bias|[OC]|Per output channel bias data. |Attribute|int*|out_pad|[2]|[out_pad_top, out_pad_left] |Attribute|int*|stride|[2]|[stride_y, stride_x] |Attribute|int*|out_shape|[4]|[N,OH,OW,OC] |Output|acc_t*|output|[N,OH,OW,OC]|Output tensor |=== *Quantization Parameters:* |=== |Argument|Type|Name|Shape|Description |Attribute|in_t|input_zp|-|Input tensor zero point |Attribute|weight_t|weight_zp|-|Weight zero point |=== *Operation Function* [source,c++] ---- assert(in_t == int8_t || input_zp == 0); // Zero point only allowed for int8_t assert(weight_t == int8_t || weight_zp == 0); for_each(index in out_shape) { tensor_write(output, [N,OH,OW,OC], index, bias[index[3]]) } for_each(0 <= n < N, 0 <= iy < IH, 0 <= ix < IW, 0 <= oc < OC, 0 <= ic < IC, 0 <= ky < KH, 0 <= kx < KW) { oy = iy * stride_y - out_pad_top + ky; ox = ix * stride_x - out_pad_left + kx; if (oy >= 0 && oy < OH && ox >= 0 && ox < OW) { acc_t acc = tensor_read(output, [N,OH,OW,OC], [n,oy,ox,oc]); acc_t value = tensor_read(input, [N,IH,IW,IC], [n,iy,ix,ic], input_zp); acc_t weight = tensor_read(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp); acc = apply_add(acc, value * weight); tensor_write(output, [N,OH,OW,OC], [n,oy,ox,oc], acc); } } ---- *Supported Data Types:* |=== |Profile|Mode|in_t|weight_t|acc_t |Any|signed 8x8|int8_t|int8_t|int32_t |Any|signed 8x4|int8_t|int4_t|int32_t |Any|signed 16x8|int16_t|int8_t|int48_t |MI, MT|floating-point|float_t|float_t|float_t |===