diff options
author | Eric Kunze <eric.kunze@arm.com> | 2020-10-01 18:50:46 -0700 |
---|---|---|
committer | Eric Kunze <eric.kunze@arm.com> | 2020-10-01 18:50:46 -0700 |
commit | 3309a5362a13f840e84a2f67b9ba7141aae58cc4 (patch) | |
tree | 74bde543bc11bf43208fa4b3789e2def23919890 /chapters/tensor_ops.adoc | |
parent | c92cb063a2ebfad2fd9ffd4c4a73184327b864ed (diff) | |
download | specification-3309a5362a13f840e84a2f67b9ba7141aae58cc4.tar.gz |
TOSA spec 0.20.0, initial public versionv0.20.0
Change-Id: I7c528ebf7e78759ada4bb951b2471c367d3f4525
Diffstat (limited to 'chapters/tensor_ops.adoc')
-rw-r--r-- | chapters/tensor_ops.adoc | 519 |
1 files changed, 519 insertions, 0 deletions
diff --git a/chapters/tensor_ops.adoc b/chapters/tensor_ops.adoc new file mode 100644 index 0000000..2ea4ba8 --- /dev/null +++ b/chapters/tensor_ops.adoc @@ -0,0 +1,519 @@ +// +// This confidential and proprietary software may be used only as +// authorised by a licensing agreement from ARM Limited +// (C) COPYRIGHT 2020 ARM Limited +// ALL RIGHTS RESERVED +// The entire notice above must be reproduced on all authorised +// copies and copies may only be made to the extent permitted +// by a licensing agreement from ARM Limited. + +=== Tensor Operators + +==== ARGMAX + +This returns the index with the largest value across the given axis of the input tensor. + +*Arguments* + +|=== +|Argument|Type|Name|Shape|Description + +|Input|in_t*|input|input_shape|Input tensor dimension k \<=4 +|Attribute|int|axis|-|Axis in range 0 to k-1 +|Output|out_t*|output|output_shape|Output tensor dimension k-1 +|=== + +*Quantization Parameters:* + +None + +*Operation Function:* + +[source,c] +---- +assert(axis >= 0 && axis < k && k <=4) +left_shape = input_shape[0:axis-1] +right_shape = input_shape[axis+1:k-1] +assert( concat(left_shape, right_shape) == output_shape ) +for_each ( left_index in left_shape, right_index in right_shape ) + in_t max_value = minimum_value<in_t> + int32 max_index = 0; + for (i=0; i<shape[axis]; i++) { + index = concat(left_index, [i], right_index) + value = tensor_read<in_t>(input, input_shape, index) + if (value > max_value) { max_value = value; max_index=i; } + } + index = concat(left_index, right_index) + tensor_write<int32_t>(output, output_shape, index, max_index) +} +---- + +*Supported Data Types:* + +|=== +|Profile|Mode|in_t|out_t + +|Any|signed 8|aint8|int32 +|Any|signed 16|int16|int32 +|MI, MT|float|float|int32 +|=== + +==== AVG_POOL2D + +This performs an average pooling over the given input tensor. A sliding window of size given by <kernel size> is passed over the input tensor, with the mean value being placed in the output tensor. + +*Arguments:* + +|=== +|Argument|Type|Name|Shape|Description + +|Input|in_t *|input|[N,H,W,C]|Input tensor 4D +|Attribute|int *|kernel|[2]|[kernel_y, kernel_x] +|Attribute|int *|stride|[2]|[stride_y, stride_x] +|Attribute|int *|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] +|Output|out_t *|output|[N,H,W,C]|Output tensor 4D +|=== + +*Quantization Parameters:* + +|=== +|Argument|Type|Name|Shape|Description + +|Attribute|in_t|input_zp|-|Input tensor zero point +|Attribute|out_t|output_zp|-|Output tensor zero point +|=== + +*Operation Function:* + +[source,c] +---- +assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8 +assert(out_t == aint8_t || output_zp == 0) // Zero point only for asymmetric int8 +pad=concat([0,0],pad,[0,0]) +for_each ( 0<=n<N, 0<=oy<H, 0<=ox<W, 0<=c<C ) { + acc_t acc = 0; + int count = 0; + iy = oy * stride_y - pad_top + ix = ox * stride_x - pad_left + for_each ( 0<=ky<kernel_y, 0<=kx<kernel_x) { + y = iy + ky + x = ix + kx + value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], input_zp, pad) + acc = apply_add<32>(acc, value) + if (0<=y<IH and 0<=x<IW) count++ + } + if (is_float(out_t)) { + value = value / (float)count; + } else { + scale = reciprocal_scale(count) + acc = apply_scale_32(acc, scale.multiplier, scale.shift, false) + acc = apply_clip(acc + output_zp, output_min, output_max) + } + tensor_write<out_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc) +} +---- + +*Supported Data Types:* +|=== +|Profile|Mode|in_t|acc_t|out_t + +|Any|signed 8|aint8|int32_t|aint8 +|Any|signed 16|int16|int32_t|int16 +|MI, MT|float|float|float|float +|=== + +==== CONV2D + +Performs a 2D convolution over the given tensor input, using the weight tensor. + +*Arguments:* + +|=== +|Argument|Type|Name|Shape|Description + +|Input|in_t*|input|[N,IH,IW,IC]|Input tensor +|Attribute|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW +|Attribute|acc_t*|bias|[OC]|Per output channel bias data. +|Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] +|Attribute|int*|stride|[2]|[stride_y, stride_x] +|Attribute|int*|dilation|[2]|[dilation_y, dilation_x] +|Output|out_t*|output|[N,H,W,OC]|Output tensor +|=== + +*Quantization Parameters:* + +|=== +|Argument|Type|Name|Shape|Description + +|Attribute|in_t|input_zp|-|Input tensor zero point +|Attribute|weight_t|weight_zp|-|Weight zero point +|=== + +*Operation Function* + +[source,c] +---- +assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8 +assert(weight_t == aint8_t || weight_zp == 0) +pad=concat([0,0],pad,[0,0]) +for_each (0<=n<N, 0<=oy<H, 0<=ox<W; 0<=oc<OC) { + acc_t acc = 0 + iy = oy * stride_y - pad_top + ix = ox * stride_x - pad_left + for_each (0<=ky<KH, 0<=kx<KW, 0<=ic<IC) { + y = iy + ky * dilation_y + x = ix + kx * dilation_x + value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,ic], input_zp, pad) + weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp) + acc = apply_add<acc_t>(acc, value * weight) + } + acc = apply_add<acc_t>(acc, bias[oc]) + tensor_write<acc_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc) +} +---- + +*Supported Data Types:* + +|=== +|Profile|Mode|in_t|weight_t|acc_t + +|Any|signed 8x8|aint8|int8,aint8|int32 +|Any|signed 8x4|aint8|int4|int32 +|Any|signed 16x8|int16|int8|int48 +|MI, MT|float|float|float|float +|=== + +==== CONV3D + +Performs a 3D convolution over the given input tensor. + +*Arguments:* + +|=== +|Argument|Type|Name|Shape|Description + +|Input|in_t*|input|[N,ID,IH,IW,IC]|Input tensor +|Attribute|weight_t*|weight|[OC,KD,KH,KW,IC]|Weight kernel size KDxKHxKW +|Attribute|acc_t*|bias|[OC]|Per output channel bias data. +|Attribute|int*|pad|[6]|[pad_d0, pad_d1, pad_top, pad_bottom, pad_left, pad_right] +|Attribute|int*|stride|[3]|[stride_d, stride_y, stride_x] +|Attribute|int*|dilation|[3]|[dilation_d, dilation_y, dilation_x] +|Output|out_t*|output|[N,D,H,W,OC]|Output tensor +|=== + +*Quantization Parameters:* + +|=== +|Argument|Type|Name|Shape|Description + +|Attribute|in_t|input_zp|-|Input tensor zero point +|Attribute|weight_t|weight_zp|-|Weight zero point +|=== + +*Operation Function* + +[source,c] +---- +assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8 +assert(weight_t == aint8_t || weight_zp == 0) +pad=concat([0,0],pad,[0,0]) +for_each (0<=n<N, 0<=od<D, 0<=oy<H, 0<=ox<W; 0<=oc<OC) { + acc_t acc = 0 + id = od * stride_d - pad_d0 + iy = oy * stride_y - pad_top + ix = ox * stride_x - pad_left + for_each (0<=kd<KD, 0<=ky<KH, 0<=kx<KW, 0<=ic<IC) { + d = id + kd * dilation_d + y = iy + ky * dilation_y + x = ix + kx * dilation_x + value = tensor_read<in_t>(input, [N,ID,IH,IW,IC], [n,d,y,x,ic], input_zp, pad) + weight = tensor_read<weight_t>(weight,[OC,KD,KH,KW,IC],[oc,kd,ky,kx,ic], weight_zp) + acc = apply_add<acc_t>(acc, value * weight) + } + acc = apply_add<acc_t>(acc, bias[oc]) + tensor_write<acc_t>(output, [N,D,H,W,OC], [n,od,oy,ox,oc], acc) +} +---- + +*Supported Data Types:* + +|=== +|Profile|Mode|in_t|weight_t|acc_t + +|Any|signed 8x8|aint8|int8,aint8|int32 +|Any|signed 8x4|aint8|int4|int32 +|Any|signed 16x8 |int16|int8|int 48 +|MI, MT|float|float|float|float +|=== + + +==== DEPTHWISE_CONV2D + +Performs 2D convolutions separately over each channel of the given tensor input, using the weight tensor. + +*Arguments:* + +|=== +|Argument|Type|Name|Shape|Description + +|Input|in_t*|input|[N,H,W,C]|Input tensor +|Attribute|weight_t*|weight|[KH,KW,C,M]|Weight kernel size KH x KW +|Attribute|acc_t*|bias|[C*M]|Per output channel bias data. +|Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] +|Attribute|int*|stride|[2]|[stride_y, stride_x] +|Attribute|int*|dilation|[2]|[dilation_y, dilation_x] +|Output|out_t*|output|[N,H,W,C*M]|Output tensor +|=== + +*Quantization Parameters:* + +|=== +|Argument|Type|Name|Shape|Description + +|Attribute|in_t|input_zp|-|Input tensor zero point +|Attribute|weight_t|weight_zp|-|Weight zero point +|=== + +*Operation Function* + +[source,c] +---- +assert(in_t==aint8_t || input_zp==0) // Zero point only for asymmetric int8 +assert(weight_t==aint8_t || weight_zp==0) +pad=concat([0,0],pad,[0,0]) +for_each (0 <= n<N, 0 <= oy < H, 0 <= ox < W; 0 <= c < (C * M), 0 <= m < M) { + acc_t acc = 0 + iy = oy * stride_y - pad_top + ix = ox * stride_x - pad_left + for_each (0<=ky<KH, 0<=kx<KW) { + y = iy + ky * dilation_y + x = ix + kx * dilation_x + value = tensor_read<in_t>(input, [N,H,W,C], [n,y,x,c], input_zp, pad) + weight = tensor_read<weight_t>(weight, [KH,KW,C,M], [ky,kx,c,m], weight_zp) + acc = apply_add<acc_t>(acc, value * weight) + } + acc = apply_add<acc_t>(acc, bias[(c*M) + m]) + tensor_write<acc_t>(output, [N,H,W,C*M], [n,oy,ox,c*M+m], acc) +} +---- + +*Supported Data Types:* + +|=== +|Profile|Mode|in_t|weight_t|acc_t + +|Any|signed 8x8|aint8|int8,aint8|int32 +|Any|signed 8x4|aint8|int4|int32 +|Any|signed 16x8|int16|int8|int48 +|MI, MT|float|float|float|float +|=== + +==== FULLY_CONNECTED + +Performs a fully connected network. + +*Arguments:* + +|=== +|Argument|Type|Name|Shape|Description + +|Input|in_t*|input|[N,IC]|Input tensor +|Attribute|weight_t*|weight|[OC,IC]|Weights +|Attribute|acc_t*|bias|[OC]|Per output channel bias data. +|Output|out_t*|output|[N,OC]|Output tensor +|=== + +*Quantization Parameters:* + +|=== +|Argument|Type|Name|Shape|Description + +|Attribute|in_t|input_zp|-|Input tensor zero point +|Attribute|weight_t|weight_zp|-|Weight zero point +|=== + +*Operation Function* + +[source,c] +---- +assert(in_t == aint8_t || input_zp == 0) // Zero point only for asymmetric int8 +assert(weight_t == aint8_t || weight_zp == 0) +for_each (0<=n<N, 0<=oc<OC) { + acc_t acc = 0 + for_each (0<=ic<IC) { + value = tensor_read<in_t>(input, [N,IC], [n,ic], input_zp) + weight = tensor_read<weight_t>(weight, [OC,IC], [oc,ic], weight_zp) + acc = apply_add<acc_t>(acc, value * weight) + } + acc = apply_add<acc_t>(acc, bias[oc]) + tensor_write<acc_t>(output, [N,OC], [n,oc], acc) +} +---- + +*Supported Data Types:* + +|=== +|Profile|Mode|in_t|weight_t|acc_t + +|Any|signed 8x8|aint8|int8,aint8|int32 +|Any|signed 8x4|aint8|int4|int32 +|Any|signed 16x8 |int16|int8|int48 +|MI, MT|float|float|float|float +|=== + +==== MATMUL +Performs a two dimensional matrix multiplication. This allows both inputs to be activations, rather than reserving weights as an attribute in the FULLY_CONNECTED operator. + +*Arguments:* + +|=== +|Argument|Type|Name|Shape|Description + +|Input|in_t*|A|[M,K]|Input tensor A +|Input|in_t*|B|[K,N]|Input tensor B +|Output|out_t*|C|[M,N]|Output tensor C +|=== + +*Quantization Parameters:* + +|=== +|Argument|Type|Name|Shape|Description + +|Attribute|in_t|A_zp|-|Input tensor A zero point +|Attribute|in_t|B_zp|-|Input tensor B zero point +|=== + +*Operation Function* + +[source,c] +---- +assert(in_t==aint8_t || (A_zp==0 && B_zp==0) // Zero point only for asymmetric int8 +for_each (0<=m<M, 0<=n<N) { + acc_t acc = 0 + for_each (0<=k<K) { + value1 = tensor_read<in_t>(A, [M,K], [m,k], A_zp) + value2 = tensor_read<in_t>(B, [K,N], [k,n], B_zp) + acc = acc + value1 * value2 + } + tensor_write<acc_t>(C, [M,N], [m,n], acc) +} +---- + +*Supported Data Types:* + +|=== +|Profile|Mode|in_t|acc_t + +|Any|signed 8x8|aint8|int32 +|Any|signed 16x16|int16|int48 +|MI, MT|float|float|float +|=== + +==== MAX_POOL2D +This performs a max pooling over the given input tensor. A sliding window of size given by <kernel size> is passed over the input tensor, with the maximum value being placed in the output tensor. + +*Arguments:* + +|=== +|Argument|Type|Name|Shape|Description + +|Input|in_t*|input|[N,H,W,C]|Input tensor 4D +|Attribute|int*|kernel|[2]|[kernel_y, kernel_x] +|Attribute|int*|stride|[2]|[stride_y, stride_x] +|Attribute|int*|pad|[4]|[pad_top, pad_bottom, pad_left, pad_right] +|Output|out_t*|output|[N,H,W,C]|Output tensor 4D +|=== + +*Quantization Parameters:* + +None + +*Operation Function:* + +[source,c] +---- +pad=concat([0,0],pad,[0,0]) +for_each ( 0<=n<N, 0<=oy<H, 0<=ox<W, 0<=c<C ) { + int32_t acc = minimum_value<in_t>; + iy = oy * stride_y - pad_top + ix = ox * stride_x - pad_left + for_each ( 0<=ky<kernel_y, 0<=kx<kernel_x ) { + y = iy + ky + x = ix + kx + value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,y,x,c], pad) + acc = apply_max(acc, value) + } + tensor_write<out_t>(output, [N,H,W,OC], [n,oy,ox,oc], acc) +} +---- + +*Supported Data Types:* + +|=== +|Profile|Mode|in_t|out_t + +|Any|signed 8|aint8|aint8 +|Any|16-bit|int16|int16 +|MI, MT|float|float|float +|=== + +==== TRANSPOSE_CONV2D + +Performs a 2D transposed convolution over the given tensor input, using the weights tensor. + +*Arguments:* + +|=== +|Argument|Type|Name|Shape|Description + +|Input|in_t*|input|[N,IH,IW,IC]|Input tensor +|Attribute|weight_t*|weight|[OC,KH,KW,IC]|Weight kernel size KH x KW +|Attribute|acc_t*|bias|[OC]|Per output channel bias data. +|Attribute|int*|outpad|[2]|[outpad_top, outpad_left] +|Attribute|int*|stride|[2]|[stride_y, stride_x] +|Attribute|int*|dilation|[2]|[dilation_y, dilation_x] +|Attribute|int*|out_shape|[4]|[N,OH,OW,OC] +|Output|out_t*|output|[N,OH,OW,OC]|Output tensor +|=== + +*Quantization Parameters:* + +|=== +|Argument|Type|Name|Shape|Description + +|Attribute|in_t|input_zp|-|Input tensor zero point +|Attribute|weight_t|weight_zp|-|Weight zero point +|=== + +*Operation Function* + +[source,c] +---- +assert(in_t==aint8_t || input_zp==0) // Zero point only for asymmetric int8 +assert(weight_t == aint8_t || weight_zp == 0) +for_each (index in out_shape) { + tensor_write<acc_t>(output, [N,OH,OW,OC], index, bias[index[3]]) +} +for_each (0<=n<N, 0<=iy<IH, 0<=ix<IW, 0<=oc<OC, 0<=ic<IC, 0<=ky<KH, 0<=kx<KW) { + oy = iy * stride_y - outpad_top + ky + ox = ix * stride_x - outpad_left + kx + if (oy>=0 && oy<OH && ox>=0 && ox<OW) { + acc = tensor_read<acc_t>(output, [N,OH,OW,OC], [n,oy,ox,oc]) + value = tensor_read<in_t>(input, [N,IH,IW,IC], [n,iy,ix,ic], input_zp) + weight = tensor_read<weight_t>(weight, [OC,KH,KW,IC], [oc,ky,kx,ic], weight_zp) + acc = apply_add<acc_t>(acc, value * weight) + tensor_write<acc_t>(output, [N,OH,OW,OC], [n,oy,ox,oc], acc) + } +} +---- + +*Supported Data Types:* + +|=== +|Profile|Mode|in_t|weight_t|acc_t + +|Any|signed 8x8|aint8|int8,aint8|int32 +|Any|signed 8x4|aint8|int4|int32 +|Any|signed 16x8|int16|int8|int48 +|MI, MT|float|float|float|float +|=== |