diff options
Diffstat (limited to 'docs/09_operators_list.dox')
-rw-r--r-- | docs/09_operators_list.dox | 1742 |
1 files changed, 1687 insertions, 55 deletions
diff --git a/docs/09_operators_list.dox b/docs/09_operators_list.dox index 82a127bbd3..244f292f82 100644 --- a/docs/09_operators_list.dox +++ b/docs/09_operators_list.dox @@ -108,6 +108,320 @@ where N = batches, C = channels, H = height, W = width <tr><td>F32<td>F32 </table> <tr> + <td rowspan="2">ArgMinMaxLayer + <td rowspan="2" style="width:200px;"> Function to calculate the index of the minimum or maximum values in a tensor based on an axis. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_ARGMAX + <li>ANEURALNETWORKS_ARGMIN + </ul> + <td>NEArgMinMaxLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>QASYMM8<td>U32, S32 + <tr><td>QASYMM8_SIGNED<td>U32, S32 + <tr><td>S32<td>U32, S32 + <tr><td>F16<td>U32, S32 + <tr><td>F32<td>U32, S32 + </table> +<tr> + <td>CLArgMinMaxLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>QASYMM8<td>U32, S32 + <tr><td>QASYMM8_SIGNED<td>U32, S32 + <tr><td>S32<td>U32, S32 + <tr><td>F16<td>U32, S32 + <tr><td>F32<td>U32, S32 + </table> +<tr> + <td rowspan="2">BatchNormalizationLayer + <td rowspan="2" style="width:200px;"> Function to perform batch normalization. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEBatchNormalizationLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F32<td>F32 + <tr><td>F16<td>F16 + </table> +<tr> + <td>CLBatchNormalizationLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F32<td>F32 + <tr><td>F16<td>F16 + </table> +<tr> + <td rowspan="2">BatchToSpaceLayer + <td rowspan="2" style="width:200px;"> Batch to space transformation. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_BATCH_TO_SPACE_ND + </ul> + <td>NEBatchToSpaceLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>All<td>s32<td>All + </table> +<tr> + <td>CLBatchToSpaceLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>All<td>s32<td>All + </table> +<tr> + <td rowspan="2">BitwiseAnd + <td rowspan="2" style="width:200px;"> Function to performe bitwise AND between 2 tensors. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_LOGICAL_AND + </ul> + <td>NEBitwiseAnd + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>U8<td>U8 + </table> +<tr> + <td>CLBitwiseAnd + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>U8<td>U8 + </table> +<tr> + <td rowspan="2">BitwiseNot + <td rowspan="2" style="width:200px;"> Function to performe bitwise NOT. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_LOGICAL_NOT + </ul> + <td>NEBitwiseNot + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>U8<td>U8 + </table> +<tr> + <td>CLBitwiseNot + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>U8<td>U8 + </table> +<tr> + <td rowspan="2">BitwiseOr + <td rowspan="2" style="width:200px;"> Function to performe bitwise OR between 2 tensors. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_LOGICAL_OR + </ul> + <td>NEBitwiseOr + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>U8<td>U8 + </table> +<tr> + <td>CLBitwiseOr + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>U8<td>U8 + </table> +<tr> + <td rowspan="2">BitwiseXor + <td rowspan="2" style="width:200px;"> Function to performe bitwise XOR between 2 tensors. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEBitwiseXor + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>U8<td>U8 + </table> +<tr> + <td>CLBitwiseXor + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>U8<td>U8 + </table> +<tr> + <td rowspan="2">BoundingBoxTransform + <td rowspan="2" style="width:200px;"> Transform proposal bounding boxes to target bounding box using bounding box deltas. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEBoundingBoxTransform + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>QASYMM16<td>QASYMM8<td>QASYMM16 + <tr><td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32 + </table> +<tr> + <td>CLBoundingBoxTransform + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>QASYMM16<td>QASYMM8<td>QASYMM16 + <tr><td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32 + </table> +<tr> + <td rowspan="2">Cast + <td rowspan="2" style="width:200px;"> Function to cast a tensor. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_CAST + </ul> + <td>NECast + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>QASYMM8_SIGNED<td>S16, S32, F32, F16 + <tr><td>QASYMM8<td>U16, S16, S32, F32, F16 + <tr><td>U8<td>U16, S16, S32, F32, F16 + <tr><td>U16<td>U8, U32 + <tr><td>S16<td>QASYMM8_SIGNED, U8, S32 + <tr><td>F16<td>QASYMM8_SIGNED, QASYMM8, F32, S32, U8 + <tr><td>S32<td>QASYMM8_SIGNED, QASYMM8, F16, F32, U8 + <tr><td>F32<td>QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8 + </table> +<tr> + <td>CLCast + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>U8<td>S8, U16, S16, U32, S32, F16, F32 + <tr><td>U16<td>U8, S8, S16, U32, S32, F16, F32 + <tr><td>S16<td>U8, S8, U16, U32, S32, F16, F32 + <tr><td>U32<td>U8, S8, U16, S16, S32, F16, F32 + <tr><td>S32<td>U8, S8, U16, S16, U32, F16, F32 + <tr><td>F16<td>U8, S8, U16, S16, U32, F32 + <tr><td>F32<td>U8, S8, U16, S16, U32, F16 + </table> +<tr> + <td rowspan="2">ChannelShuffleLayer + <td rowspan="2" style="width:200px;"> Function to shuffle the channels of the input tensor. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_CHANNEL_SHUFFLE + </ul> + <td>NEChannelShuffleLayer + <td> + <ul> + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLChannelShuffleLayer + <td> + <ul> + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> <td rowspan="2">ConcatenateLayer <td rowspan="2" style="width:200px;"> Function to concatenate tensors along a given axis. <td rowspan="2"> @@ -146,7 +460,7 @@ where N = batches, C = channels, H = height, W = width <td rowspan="2" style="width:200px;"> Function to tranpose the wieghts for the fully connected layer. <td rowspan="2"> <ul> - <li>None + <li>n/a </ul> <td>NEConvertFullyConnectedWeights <td> @@ -172,11 +486,51 @@ where N = batches, C = channels, H = height, W = width <tr><td>All<td>All </table> <tr> + <td rowspan="2">ConvolutionLayer + <td rowspan="2" style="width:200px;"> Function to compute a convolution layer. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_CONV_2D + </ul> + <td>NEConvolutionLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED + </table> +<tr> + <td>CLConvolutionLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED + </table> +<tr> <td rowspan="2">Copy <td rowspan="2" style="width:200px;"> Function to copy a tensor. <td rowspan="2"> <ul> - <li>None + <li>n/a </ul> <td>NECopy <td> @@ -200,8 +554,186 @@ where N = batches, C = channels, H = height, W = width <tr><td>All<td>All </table> <tr> + <td rowspan="2">CropResize + <td rowspan="2" style="width:200px;"> Function to perform cropping and resizing. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NECropResize + <td> + <ul> + <li>NHWC + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>All<td>F32<td>F32<td>F32 + </table> +<tr> + <td>CLCropResize + <td> + <ul> + <li>NHWC + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>All<td>F32<td>F32<td>F32 + </table> +<tr> + <td rowspan="2">DeconvolutionLayer + <td rowspan="2" style="width:200px;"> Function to compute a deconvolution or tranpose convolution. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_TRANSPOSE_CONV_2D + </ul> + <td>NEDeconvolutionLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED + </table> +<tr> + <td>CLDeconvolutionLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED + </table> +<tr> + <td rowspan="2">DepthConvertLayer + <td rowspan="2" style="width:200px;"> Performs a down-scaling depth conversion. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEDepthConvertLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>QASYMM8<td>F16, F32 + <tr><td>U8<td>U16, S16, S32 + <tr><td>U16<td>U8, U32 + <tr><td>S16<td>U8, S32 + <tr><td>BFLOAT16<td>F32 + <tr><td>F16<td>QASYMM8, F32 + <tr><td>F32<td>QASYMM8, F16, BFLOAT16 + </table> +<tr> + <td>CLDepthConvertLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>U8<td>S8, U16, S16, U32, S32, F16, F32 + <tr><td>U16<td>U8, S8, S16, U32, S32, F16, F32 + <tr><td>S16<td>U8, S8, U16, U32, S32, F16, F32 + <tr><td>U32<td>U8, S8, U16, S16, S32, F16, F32 + <tr><td>S32<td>U8, S8, U16, S16, U32, F16, F32 + <tr><td>F16<td>U8, S8, U16, S16, U32, F32 + <tr><td>F32<td>U8, S8, U16, S16, U32, F16 + </table> +<tr> + <td rowspan="2">DepthToSpaceLayer + <td rowspan="2" style="width:200px;"> Depth to Space transformation. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_DEPTH_TO_SPACE + </ul> + <td>NEDepthToSpaceLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLDepthToSpaceLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td rowspan="2">DepthwiseConvolutionLayer + <td rowspan="2" style="width:200px;"> Function to perform depthwise separable convolution. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_DEPTHWISE_CONV_2D + </ul> + <td>NEDepthwiseConvolutionLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED + </table> +<tr> + <td>CLDepthwiseConvolutionLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED + </table> +<tr> <td rowspan="2">DequantizationLayer - <td rowspan="2" style="width:200px;"> Function to dequantize the values in a tensor + <td rowspan="2" style="width:200px;"> Function to dequantize the values in a tensor. <td rowspan="2"> <ul> <li>ANEURALNETWORKS_DEQUANTIZE @@ -214,16 +746,11 @@ where N = batches, C = channels, H = height, W = width <td> <table> <tr><th>src<th>dst - <tr><td>QASYMM8<td>F16 - <tr><td>QASYMM8<td>F32 - <tr><td>QASYMM8_SIGNED<td>F16 - <tr><td>QASYMM8_SIGNED<td>F32 - <tr><td>QSYMM8_PER_CHANNEL<td>F16 - <tr><td>QSYMM8_PER_CHANNEL<td>F32 - <tr><td>QSYMM8<td>F16 - <tr><td>QSYMM8<td>F32 - <tr><td>QSYMM16<td>F16 - <tr><td>QSYMM16<td>F32 + <tr><td>QASYMM8<td>F16, F32 + <tr><td>QASYMM8_SIGNED<td>F16, F32 + <tr><td>QSYMM8_PER_CHANNEL<td>F16, F32 + <tr><td>QSYMM8<td>F16, F32 + <tr><td>QSYMM16<td>F16, F32 </table> <tr> <td>CLDequantizationLayer @@ -234,20 +761,15 @@ where N = batches, C = channels, H = height, W = width <td> <table> <tr><th>src<th>dst - <tr><td>QASYMM8<td>F16 - <tr><td>QASYMM8<td>F32 - <tr><td>QASYMM8_SIGNED<td>F16 - <tr><td>QASYMM8_SIGNED<td>F32 - <tr><td>QSYMM8_PER_CHANNEL<td>F16 - <tr><td>QSYMM8_PER_CHANNEL<td>F32 - <tr><td>QSYMM8<td>F16 - <tr><td>QSYMM8<td>F32 - <tr><td>QSYMM16<td>F16 - <tr><td>QSYMM16<td>F32 + <tr><td>QASYMM8<td>F16, F32 + <tr><td>QASYMM8_SIGNED<td>F16, F32 + <tr><td>QSYMM8_PER_CHANNEL<td>F16, F32 + <tr><td>QSYMM8<td>F16, F32 + <tr><td>QSYMM16<td>F16, F32 </table> <tr> <td rowspan="2">DirectConvolutionLayer - <td rowspan="2" style="width:200px;"> Function to + <td rowspan="2" style="width:200px;"> Function to compute direct convolution. <td rowspan="2"> <ul> <li>ANEURALNETWORKS_CONV_2D @@ -281,10 +803,10 @@ where N = batches, C = channels, H = height, W = width </table> <tr> <td rowspan="2">FFT1D - <td rowspan="2" style="width:200px;"> Fast Fourier Transform 1D + <td rowspan="2" style="width:200px;"> Fast Fourier Transform 1D. <td rowspan="2"> <ul> - <li>None + <li>n/a </ul> <td>NEFFT1D <td> @@ -310,10 +832,10 @@ where N = batches, C = channels, H = height, W = width </table> <tr> <td rowspan="2">FFT2D - <td rowspan="2" style="width:200px;"> Fast Fourier Transform 2D + <td rowspan="2" style="width:200px;"> Fast Fourier Transform 2D. <td rowspan="2"> <ul> - <li>None + <li>n/a </ul> <td>NEFFT2D <td> @@ -339,7 +861,7 @@ where N = batches, C = channels, H = height, W = width </table> <tr> <td rowspan="2">FFTConvolutionLayer - <td rowspan="2" style="width:200px;"> Fast Fourier Transform Convolution + <td rowspan="2" style="width:200px;"> Fast Fourier Transform Convolution. <td rowspan="2"> <ul> <li>ANEURALNETWORKS_CONV_2D @@ -368,7 +890,7 @@ where N = batches, C = channels, H = height, W = width </table> <tr> <td rowspan="2">Fill - <td rowspan="2" style="width:200px;"> Set the values of a tensor with a given value + <td rowspan="2" style="width:200px;"> Set the values of a tensor with a given value. <td rowspan="2"> <ul> <li>ANEURALNETWORKS_FILL @@ -395,8 +917,64 @@ where N = batches, C = channels, H = height, W = width <tr><td>All<td>All </table> <tr> + <td rowspan="2">FillBorder + <td rowspan="2" style="width:200px;"> Function to . + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEFillBorder + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLFillBorder + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td rowspan="2">FlattenLayer + <td rowspan="2" style="width:200px;"> Reshape a tensor to be 1D + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_RESHAPE + </ul> + <td>NEFlattenLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLFlattenLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> <td rowspan="2">Floor - <td rowspan="2" style="width:200px;"> Round the value to the lowest number + <td rowspan="2" style="width:200px;"> Round the value to the lowest number. <td rowspan="2"> <ul> <li>ANEURALNETWORKS_FLOOR @@ -425,6 +1003,512 @@ where N = batches, C = channels, H = height, W = width <tr><td>F16<td>F16 </table> <tr> + <td rowspan="2">FullyConnectedLayer + <td rowspan="2" style="width:200px;"> Function to perform a fully connected / dense layer. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_FULLY_CONNECTED + </ul> + <td>NEFullyConnectedLayerReshapeWeightsManaged + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + </table> +<tr> + <td>CLFullyConnectedLayerReshapeWeightsManaged + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + </table> +<tr> + <td rowspan="2">FuseBatchNormalization + <td rowspan="2" style="width:200px;"> Function to fuse the batch normalization node to a preceding convolution node. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEFuseBatchNormalization + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F32<td>F32 + <tr><td>F16<td>F16 + </table> +<tr> + <td>CLFuseBatchNormalization + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F32<td>F32 + <tr><td>F16<td>F16 + </table> +<tr> + <td rowspan="2">Gather + <td rowspan="2" style="width:200px;"> Performs the Gather operation along the chosen axis. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_GATHER + </ul> + <td>NEGather + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLGather + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td rowspan="2">GEMM + <td rowspan="2" style="width:200px;"> General Matrix Multiplication. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEGEMM + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>BFLOAT16<td>BFLOAT16<td>BFLOAT16<td>BFLOAT16 + </table> +<tr> + <td>CLGEMMReshapeRHSMatrixKernelManaged + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>F16<td>F16<td>F16<td>F16 + </table> +<tr> + <td rowspan="2">GEMMConvolutionLayer + <td rowspan="2" style="width:200px;"> General Matrix Multiplication. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_CONV_2D + </ul> + <td>NEConvolutionLayerReshapeWeights + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>BFLOAT16<td>BFLOAT16<td>BFLOAT16<td>BFLOAT16 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED + </table> +<tr> + <td>CLConvolutionLayerReshapeWeights + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED + </table> +<tr> + <td rowspan="2">GEMMLowpMatrixMultiplyCore + <td rowspan="2" style="width:200px;"> General Matrix Multiplication. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEGEMMLowpMatrixMultiplyCore + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>S32 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>S32 + <tr><td>QASYMM8<td>QSYMM8<td>S32<td>S32 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>S32 + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>S32 + <tr><td>QASYMM8_SIGNED<td>QSYMM8<td>S32<td>S32 + </table> +<tr> + <td>CLGEMMLowpMatrixMultiplyCore + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QSYMM8<td>S32<td>QASYMM8 + <tr><td>QASYMM8<td>QASYMM8<td>S32<td>S32 + <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>S32 + <tr><td>QASYMM8<td>QSYMM8<td>S32<td>S32 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QSYMM8<td>S32<td>QASYMM8_SIGNED + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>S32 + <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>S32 + <tr><td>QASYMM8_SIGNED<td>QSYMM8<td>S32<td>S32 + </table> +<tr> + <td rowspan="2">GenerateProposalsLayer + <td rowspan="2" style="width:200px;"> Function to generate proposals for a RPN (Region Proposal Network). + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_GENERATE_PROPOSALS + </ul> + <td>NEGenerateProposalsLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QSYMM8<td>QSYMM16<td>QASYMM8 + </table> +<tr> + <td>CLGenerateProposalsLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QSYMM8<td>QSYMM16<td>QASYMM8 + </table> +<tr> + <td rowspan="2">InstanceNormalizationLayer + <td rowspan="2" style="width:200px;"> Function to perform a Instance normalization on a given axis. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_INSTANCE_NORMALIZATION + </ul> + <td>NEInstanceNormalizationLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 + </table> +<tr> + <td>CLInstanceNormalizationLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 + </table> +<tr> + <td rowspan="2">L2NormalizeLayer + <td rowspan="2" style="width:200px;"> Function to perform a L2 normalization on a given axis. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_L2_NORMALIZATION + </ul> + <td>NEL2NormalizeLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 + </table> +<tr> + <td>CLL2NormalizeLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 + </table> +<tr> + <td rowspan="2">LSTMLayer + <td rowspan="2" style="width:200px;"> Function to perform a single time step in a Long Short-Term Memory (LSTM) layer. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_LSTM + </ul> + <td>NELSTMLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0 - src13<th>dst0 - dst3 + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 + </table> +<tr> + <td>CLLSTMLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0 - src13<th>dst0 - dst3 + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 + </table> +<tr> + <td rowspan="2">LSTMLayerQuantized + <td rowspan="2" style="width:200px;"> Function to perform quantized LSTM (Long Short-Term Memory) + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_QUANTIZED_LSTM + <li>ANEURALNETWORKS_QUANTIZED_16BIT_LSTM + </ul> + <td>NELSTMLayerQuantized + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0 - src8<th>src9 - src12<th>src13<th>src14<th>dst0<th>dst1 + <tr><td>QASYMM8<td>S32<td>QSYMM16<td>QASYMM8<td>QSYMM16<td>QASYMM8 + </table> +<tr> + <td>CLLSTMLayerQuantized + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0 - src8<th>src9 - src12<th>src13<th>src14<th>dst0<th>dst1 + <tr><td>QASYMM8<td>S32<td>QSYMM16<td>QASYMM8<td>QSYMM16<td>QASYMM8 + </table> +<tr> + <td rowspan="2">MaxUnpoolingLayer + <td rowspan="2" style="width:200px;"> Function to perform MaxUnpooling. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEMaxUnpoolingLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>QASYMM8<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 + </table> +<tr> + <td>CLMaxUnpoolingLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>QASYMM8<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 + </table> +<tr> + <td rowspan="2">MeanStdDevNormalizationLayer + <td rowspan="2" style="width:200px;"> Function to execute mean and standard deviation normalization. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEMeanStdDevNormalizationLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F32<td>F32 + <tr><td>F16<td>F16 + </table> +<tr> + <td>CLMeanStdDevNormalizationLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F32<td>F32 + <tr><td>F16<td>F16 + </table> +<tr> + <td rowspan="2">NormalizationLayer + <td rowspan="2" style="width:200px;"> Function to compute normalization layer. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION + </ul> + <td>NENormalizationLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F32<td>F32 + <tr><td>F16<td>F16 + </table> +<tr> + <td>CLNormalizationLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>F32<td>F32 + <tr><td>F16<td>F16 + </table> +<tr> + <td rowspan="2">PadLayer + <td rowspan="2" style="width:200px;"> Function to pad a tensor. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_PAD + <li>ANEURALNETWORKS_PAD_V2 + </ul> + <td>NEPadLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLPadLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> <td rowspan="2">Permute <td rowspan="2" style="width:200px;"> Function to transpose an ND tensor. <td rowspan="2"> @@ -575,6 +1659,65 @@ where N = batches, C = channels, H = height, W = width <tr><td>F32<td>F32 </table> <tr> + <td rowspan="2">PriorBoxLayer + <td rowspan="2" style="width:200px;"> Function to . + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEPriorBoxLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>F32<td>F32<td>F32 + </table> +<tr> + <td>CLPriorBoxLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>F32<td>F32<td>F32 + </table> +<tr> + <td rowspan="2">QLSTMLayer + <td rowspan="2" style="width:200px;"> Function to perform quantized LSTM (Long Short-Term Memory). + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_QUANTIZED_LSTM + <li>ANEURALNETWORKS_QUANTIZED_16BIT_LSTM + </ul> + <td>NEQLSTMLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1 - src6<th>src7 -src9<th>src10<th>src11<th>dst0<th>dst1 - dst2 + <tr><td>QASYMM8_SIGNED<td>QASYMM8<td>S32<td>QSYMM16<td>QASYMM8_SIGNED<td>QSYMM16<td>QASYMM8_SIGNED + </table> +<tr> + <td>CLQLSTMLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1 - src6<th>src7 -src9<th>src10<th>src11<th>dst0<th>dst1 - dst2 + <tr><td>QASYMM8_SIGNED<td>QASYMM8<td>S32<td>QSYMM16<td>QASYMM8_SIGNED<td>QSYMM16<td>QASYMM8_SIGNED + </table> +<tr> <td rowspan="2">QuantizationLayer <td rowspan="2" style="width:200px;"> Function to perform quantization layer <td rowspan="2"> @@ -589,21 +1732,130 @@ where N = batches, C = channels, H = height, W = width <td> <table> <tr><th>src<th>dst + <tr><td>QASYMM8<td>QASYMM8, QASYMM8_SIGNED, QASYMM16 + <tr><td>QASYMM8_SIGNED<td>QASYMM8, QASYMM8_SIGNED, QASYMM16 + <tr><td>F16<td>QASYMM8, QASYMM8_SIGNED, QASYMM16 + <tr><td>F32<td>QASYMM8, QASYMM8_SIGNED, QASYMM16 + </table> +<tr> + <td>CLQuantizationLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>QASYMM8<td>QASYMM8, QASYMM8_SIGNED, QASYMM16 + <tr><td>QASYMM8_SIGNED<td>QASYMM8, QASYMM8_SIGNED, QASYMM16 + <tr><td>F16<td>QASYMM8, QASYMM8_SIGNED, QASYMM16 + <tr><td>F32<td>QASYMM8, QASYMM8_SIGNED, QASYMM16 + </table> +<tr> + <td rowspan="2">Range + <td rowspan="2" style="width:200px;"> Function to generates a sequence of numbers starting from START and extends by increments of 'STEP' up to but not including 'END'. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NERange + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>dst + <tr><td>U8 + <tr><td>S8 + <tr><td>U16 + <tr><td>S16 + <tr><td>U32 + <tr><td>S32 + <tr><td>F16 + <tr><td>F32 + </table> +<tr> + <td>CLRange + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>dst + <tr><td>U8 + <tr><td>S8 + <tr><td>QASYMM8 + <tr><td>U16 + <tr><td>S16 + <tr><td>U32 + <tr><td>S32 + <tr><td>F16 + <tr><td>F32 + </table> +<tr> + <td rowspan="2">ReduceMean + <td rowspan="2" style="width:200px;"> Function to performe reduce mean operation. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_MEAN + </ul> + <td>NEReduceMean + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst <tr><td>QASYMM8<td>QASYMM8 - <tr><td>QASYMM8<td>QASYMM8_SIGNED - <tr><td>QASYMM8<td>QASYMM16 - <tr><td>QASYMM8_SIGNED<td>QASYMM8 <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED - <tr><td>QASYMM8_SIGNED<td>QASYMM16 - <tr><td>F16<td>QASYMM8 - <tr><td>F16<td>QASYMM8_SIGNED - <tr><td>F16<td>QASYMM16 - <tr><td>F32<td>QASYMM8 - <tr><td>F32<td>QASYMM8_SIGNED - <tr><td>F32<td>QASYMM16 + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 </table> <tr> - <td>CLQuantizationLayer + <td>CLReduceMean + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>QASYMM8<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 + </table> +<tr> + <td rowspan="2">ReductionOperation + <td rowspan="2" style="width:200px;"> Function to performe reduce with the following operations - ARG_IDX_MAX: Index of the max value - ARG_IDX_MIN: Index of the min value - MEAN_SUM: Mean of sum - PROD: Product - SUM_SQUARE: Sum of squares - SUM: Sum - MIN: Min - MAX: Max + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_REDUCE_ALL + <li>ANEURALNETWORKS_REDUCE_ANY + <li>ANEURALNETWORKS_REDUCE_MAX + <li>ANEURALNETWORKS_REDUCE_MIN + <li>ANEURALNETWORKS_REDUCE_PROD + <li>ANEURALNETWORKS_REDUCE_SUM + </ul> + <td>NEReductionOperation + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>QASYMM8<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 + <tr><td>S32<td>S32 + </table> +<tr> + <td>CLReductionOperation <td> <ul> <li>All @@ -612,21 +1864,44 @@ where N = batches, C = channels, H = height, W = width <table> <tr><th>src<th>dst <tr><td>QASYMM8<td>QASYMM8 - <tr><td>QASYMM8<td>QASYMM8_SIGNED - <tr><td>QASYMM8<td>QASYMM16 - <tr><td>QASYMM8_SIGNED<td>QASYMM8 <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED - <tr><td>QASYMM8_SIGNED<td>QASYMM16 - <tr><td>F16<td>QASYMM8 - <tr><td>F16<td>QASYMM8_SIGNED - <tr><td>F16<td>QASYMM16 - <tr><td>F32<td>QASYMM8 - <tr><td>F32<td>QASYMM8_SIGNED - <tr><td>F32<td>QASYMM16 + <tr><td>F16<td>F16 + <tr><td>F32<td>F32 + <tr><td>S32<td>S32 + </table> +<tr> + <td rowspan="2">ReorgLayer + <td rowspan="2" style="width:200px;"> Performs a reorganization layer of input tensor to the output tensor. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEReorgLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLReorgLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All </table> <tr> <td rowspan="2">ReshapeLayer - <td rowspan="2" style="width:200px;"> Fucntion to reshape a tensor + <td rowspan="2" style="width:200px;"> Function to reshape a tensor. <td rowspan="2"> <ul> <li>ANEURALNETWORKS_RESHAPE @@ -654,8 +1929,133 @@ where N = batches, C = channels, H = height, W = width <tr><td>All<td>All </table> <tr> + <td rowspan="2">Reverse + <td rowspan="2" style="width:200px;"> Function to reverse tensor according to axis. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEReverse + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>All<td>U32<td>All + </table> +<tr> + <td>CLReverse + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>All<td>U32<td>All + </table> +<tr> + <td rowspan="2">RNNLayer + <td rowspan="2" style="width:200px;"> Function to perform recurrent neural network layer. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_RNN + </ul> + <td>NERNNLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>src3<th>dst0<th>dst1 + <tr><td>F16<td>F16<td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32<td>F32<td>F32 + </table> +<tr> + <td>CLRNNLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>src3<th>dst0<th>dst1 + <tr><td>F16<td>F16<td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32<td>F32<td>F32 + </table> +<tr> + <td rowspan="2">ROIAlignLayer + <td rowspan="2" style="width:200px;"> Function to perform ROI alignment. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_ROI_ALIGN + </ul> + <td>NEROIAlignLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QASYMM16<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM16<td>QASYMM8_SIGNED + </table> +<tr> + <td>CLROIAlignLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32 + <tr><td>QASYMM8<td>QASYMM16<td>QASYMM8 + <tr><td>QASYMM8_SIGNED<td>QASYMM16<td>QASYMM8_SIGNED + </table> +<tr> + <td rowspan="2">ROIPoolingLayer + <td rowspan="2" style="width:200px;"> Function to perform ROI pooling. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_ROI_POOLING + </ul> + <td>NEROIPoolingLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>F32<td>U16<td>F32 + <tr><td>QASYMM8<td>U16<td>QASYMM8 + </table> +<tr> + <td>CLROIPoolingLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>dst + <tr><td>F16<td>U16<td>F16 + <tr><td>F32<td>U16<td>F32 + <tr><td>QASYMM8<td>U16<td>QASYMM8 + </table> +<tr> <td rowspan="2">Scale - <td rowspan="2" style="width:200px;"> Fucntion to perform resize a tensor using to interpolate: - Bilenear - Nearest neighbor + <td rowspan="2" style="width:200px;"> Function to perform resize a tensor using to interpolate: - Bilinear - Nearest neighbor <td rowspan="2"> <ul> <li>ANEURALNETWORKS_RESIZE_BILINEAR @@ -695,6 +2095,34 @@ where N = batches, C = channels, H = height, W = width <tr><td>S16<td>S16 </table> <tr> + <td rowspan="2">Select + <td rowspan="2" style="width:200px;"> Function to select values from 2 tensors depending on an input tensor of booleans. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_SELECT + </ul> + <td>NESelect + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>U8<td>All<td>All<td>All + </table> +<tr> + <td>CLSelect + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>U8<td>All<td>All<td>All + </table> +<tr> <td rowspan="2">Slice <td rowspan="2" style="width:200px;"> Function to perform tensor slicing. <td rowspan="2"> @@ -723,6 +2151,122 @@ where N = batches, C = channels, H = height, W = width <tr><td>All<td>All </table> <tr> + <td rowspan="2">SpaceToBatchLayer + <td rowspan="2" style="width:200px;"> Function to divide a tensor spatially. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_SPACE_TO_BATCH_ND + </ul> + <td>NESpaceToBatchLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>All<td>S32<td>S32<td>All + </table> +<tr> + <td>CLSpaceToBatchLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>All<td>S32<td>S32<td>All + </table> +<tr> + <td rowspan="2">SpaceToDepthLayer + <td rowspan="2" style="width:200px;"> Function to rearrange blocks of spatial data into depth. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_SPACE_TO_DEPTH + </ul> + <td>NESpaceToDepthLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLSpaceToDepthLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td rowspan="2">Split + <td rowspan="2" style="width:200px;"> Function to split a tensor along a given axis. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_SPLIT + </ul> + <td>NESplit + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLSplit + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td rowspan="2">StackLayer + <td rowspan="2" style="width:200px;"> Function to stack tensors along an axis. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEStackLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLStackLayer + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> <td rowspan="2">StridedSlice <td rowspan="2" style="width:200px;"> Function to extract a strided slice of a tensor. <td rowspan="2"> @@ -751,8 +2295,36 @@ where N = batches, C = channels, H = height, W = width <tr><td>All<td>All </table> <tr> + <td rowspan="2">Tile + <td rowspan="2" style="width:200px;"> Function to construct a tensor by tiling a given tensor. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_TILE + </ul> + <td>NETile + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLTile + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> <td rowspan="2">Transpose - <td rowspan="2" style="width:200px;"> Function to transpose an 2D tensor. + <td rowspan="2" style="width:200px;"> Function to transpose a 2D tensor. <td rowspan="2"> <ul> <li>ANEURALNETWORKS_TRANSPOSE @@ -778,6 +2350,66 @@ where N = batches, C = channels, H = height, W = width <tr><th>src<th>dst <tr><td>All<td>All </table> +<tr> + <td rowspan="2">Unstack + <td rowspan="2" style="width:200px;"> Function to unpack a rank-R tensor into rank-(R-1) tensors. + <td rowspan="2"> + <ul> + <li>n/a + </ul> + <td>NEUnstack + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td>CLUnstack + <td> + <ul> + <li>All + </ul> + <td> + <table> + <tr><th>src<th>dst + <tr><td>All<td>All + </table> +<tr> + <td rowspan="2">WinogradConvolutionLayer + <td rowspan="2" style="width:200px;"> Function to do Winograd Convolution. + <td rowspan="2"> + <ul> + <li>ANEURALNETWORKS_CONV_2D + </ul> + <td>NEWinogradConvolutionLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + </table> +<tr> + <td>CLWinogradConvolutionLayer + <td> + <ul> + <li>NHWC + <li>NCHW + </ul> + <td> + <table> + <tr><th>src0<th>src1<th>src2<th>dst + <tr><td>F16<td>F16<td>F16<td>F16 + <tr><td>F32<td>F32<td>F32<td>F32 + </table> </table> */ |