From 6124ce60b54eb5639ed19d46c79fce21cca2c83b Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Tue, 4 May 2021 14:03:13 +0100 Subject: Update operator list part3 Partially resolve: COMPMID-4199 Signed-off-by: Sheri Zhang Change-Id: Id24702d258fb4e04ad948e7cf6c0efd98d2a5456 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5561 Reviewed-by: TeresaARM Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- docs/09_operators_list.dox | 825 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 766 insertions(+), 59 deletions(-) (limited to 'docs/09_operators_list.dox') diff --git a/docs/09_operators_list.dox b/docs/09_operators_list.dox index 244f292f82..fc41265738 100644 --- a/docs/09_operators_list.dox +++ b/docs/09_operators_list.dox @@ -144,6 +144,62 @@ where N = batches, C = channels, H = height, W = width F16U32, S32 F32U32, S32 + + ArithmeticAddition + Function to add 2 tensors. + +
    +
  • ANEURALNETWORKS_ADD +
+ NEArithmeticAddition + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
QSYMM16QSYMM16S32 +
U8U8U8 +
U8U8S16 +
U8S16S16 +
S16U8S16 +
S16S16S16 +
S32S32S32 +
F16F16F16 +
F32F32F32 +
+ + ArithmeticSubtraction + Function to substract 2 tensors. + +
    +
  • ANEURALNETWORKS_SUB +
+ NEArithmeticSubtraction + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
QSYMM16QSYMM16S32 +
U8U8U8 +
U8U8S16 +
U8S16S16 +
S16U8S16 +
S16S16S16 +
S32S32S32 +
F16F16F16 +
F32F32F32 +
BatchNormalizationLayer Function to perform batch normalization. @@ -421,6 +477,28 @@ where N = batches, C = channels, H = height, W = width srcdst AllAll + + Comparison + Function to compare 2 tensors. + +
    +
  • ANEURALNETWORKS_EQUAL +
  • ANEURALNETWORKS_GREATER +
  • ANEURALNETWORKS_GREATER_EQUAL +
  • ANEURALNETWORKS_LESS +
  • ANEURALNETWORKS_LESS_EQUAL +
  • ANEURALNETWORKS_NOT_EQUAL +
+ CLComparison + +
    +
  • All +
+ + +
src0src1dst +
AllAllU8 +
ConcatenateLayer Function to concatenate tensors along a given axis. @@ -553,6 +631,23 @@ where N = batches, C = channels, H = height, W = width srcdst AllAll + + Crop + Performs a copy of input tensor to the output tensor. + +
    +
  • n/a +
+ CLCrop + +
    +
  • NHWC +
+ + +
srcdst +
AllF32 +
CropResize Function to perform cropping and resizing. @@ -621,6 +716,24 @@ where N = batches, C = channels, H = height, W = width QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED + + DeconvolutionLayerUpsample + Function to execute deconvolution upsample on OpenCL. + +
    +
  • ANEURALNETWORKS_TRANSPOSE_CONV_2D +
+ CLDeconvolutionLayerUpsample + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
AllAll +
DepthConvertLayer Performs a down-scaling depth conversion. @@ -706,39 +819,420 @@ where N = batches, C = channels, H = height, W = width -
src0src1src2dst -
F16F16F16F16 -
F32F32F32F32 -
QASYMM8QASYMM8S32QASYMM8 -
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 -
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED -
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + CLDepthwiseConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + DequantizationLayer + Function to dequantize the values in a tensor. + +
    +
  • ANEURALNETWORKS_DEQUANTIZE +
+ NEDequantizationLayer + +
    +
  • All +
+ + +
srcdst +
QASYMM8F16, F32 +
QASYMM8_SIGNEDF16, F32 +
QSYMM8_PER_CHANNELF16, F32 +
QSYMM8F16, F32 +
QSYMM16F16, F32 +
+ + CLDequantizationLayer + +
    +
  • All +
+ + +
srcdst +
QASYMM8F16, F32 +
QASYMM8_SIGNEDF16, F32 +
QSYMM8_PER_CHANNELF16, F32 +
QSYMM8F16, F32 +
QSYMM16F16, F32 +
+ + DetectionPostProcessLayer + Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS). + +
    +
  • ANEURALNETWORKS_DETECTION_POSTPROCESSING +
+ NEDetectionPostProcessLayer + +
    +
  • All +
+ + +
src0 - src2dst0 - dst3 +
QASYMM8F32 +
QASYMM8_SIGNEDF32 +
F32F32 +
+ + DirectConvolutionLayer + Function to compute direct convolution. + +
    +
  • ANEURALNETWORKS_CONV_2D +
+ NEDirectConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
+ + CLDirectConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
+ + DirectDeconvolutionLayer + Function to run the deconvolution layer. + +
    +
  • ANEURALNETWORKS_TRANSPOSE_CONV_2D +
+ CLDirectDeconvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + ElementWiseOperations + Function to perform in Cpu: - Div - Max - Min - Pow - SquaredDiff - Comparisons (Equal, greater, greater_equal, less, less_equal, not_equal) Function to perform in CL: - Add - Sub - Div - Max - Min - Pow - SquaredDiff + +
    +
  • ANEURALNETWORKS_MAXIMUM +
  • ANEURALNETWORKS_MINIMUM +
  • ANEURALNETWORKS_POW +
  • ANEURALNETWORKS_DIV +
  • ANEURALNETWORKS_ADD +
  • ANEURALNETWORKS_SUB +
  • ANEURALNETWORKS_EQUAL +
  • ANEURALNETWORKS_GREATER +
  • ANEURALNETWORKS_GREATER_EQUAL +
  • ANEURALNETWORKS_LESS +
  • ANEURALNETWORKS_LESS_EQUAL +
  • ANEURALNETWORKS_NOT_EQUAL +
+ NEElementwiseMax + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
S32S32S32 +
S16S16S16 +
F16F16F16 +
F32F32F32 +
+ + NEElementwiseMin + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
S32S32S32 +
S16S16S16 +
F16F16F16 +
F32F32F32 +
+ + NEElementwiseSquaredDiff + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
S32S32S32 +
S16S16S16 +
F16F16F16 +
F32F32F32 +
+ + NEElementwiseDivision + +
    +
  • All +
+ + +
src0src1dst +
F16F16F16 +
F32F32F32 +
+ + NEElementwisePower + +
    +
  • All +
+ + +
src0src1dst +
F16F16F16 +
F32F32F32 +
+ + NEElementwiseComparison + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8U8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDU8 +
S32S32U8 +
U8U8U8 +
S16S16U8 +
F16F16U8 +
F32F32U8 +
+ + CLArithmeticAddition + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
U8U8U8 +
U8U8S16 +
U8S16S16 +
S16U8S16 +
S16S16S16 +
S32S32S32 +
F16F16F16 +
F32F32F32 +
+ + CLArithmeticSubtraction + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
U8U8U8 +
U8U8S16 +
U8S16S16 +
S16U8S16 +
S16S16S16 +
S32S32S32 +
F16F16F16 +
F32F32F32 +
+ + CLArithmeticDivision + +
    +
  • All +
+ + +
src0src1dst +
F16F16F16 +
F32F32F32 +
+ + CLElementwiseMax + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
U8U8U8 +
S16S16S16 +
S32S32S32 +
U32U32U32 +
F16F16F16 +
F32F32F32 +
+ + CLElementwiseMin + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
U8U8U8 +
S16S16S16 +
S32S32S32 +
U32U32U32 +
F16F16F16 +
F32F32F32 +
+ + CLElementwiseSquaredDiff + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
U8U8U8 +
S16S16S16 +
F16F16F16 +
F32F32F32 +
+ + CLElementwisePower + +
    +
  • All +
+ + +
src0src1dst +
F16F16F16 +
F32F32F32 +
+ + ElementwiseUnaryLayer + Function to perform: - Rsqrt - Exp - Neg - Log - Abs - Round - Sin + +
    +
  • ANEURALNETWORKS_ABS +
  • ANEURALNETWORKS_EXP +
  • ANEURALNETWORKS_LOG +
  • ANEURALNETWORKS_NEG +
  • ANEURALNETWORKS_RSQRT +
  • ANEURALNETWORKS_SIN +
+ NEElementwiseUnaryLayer + +
    +
  • All +
+ + +
srcdst +
F16F16 +
F32F32 +
S32S32 +
+ + CLRsqrtLayer + +
    +
  • All +
+ + +
srcdst +
F16F16 +
F32F32
- CLDepthwiseConvolutionLayer + CLExpLayer
    -
  • NHWC -
  • NCHW +
  • All
-
src0src1src2dst -
F16F16F16F16 -
F32F32F32F32 -
QASYMM8QASYMM8S32QASYMM8 -
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 -
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED -
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
srcdst +
F16F16 +
F32F32
- DequantizationLayer - Function to dequantize the values in a tensor. - -
    -
  • ANEURALNETWORKS_DEQUANTIZE -
- NEDequantizationLayer + CLNegLayer
  • All @@ -746,14 +1240,11 @@ where N = batches, C = channels, H = height, W = width
    srcdst -
    QASYMM8F16, F32 -
    QASYMM8_SIGNEDF16, F32 -
    QSYMM8_PER_CHANNELF16, F32 -
    QSYMM8F16, F32 -
    QSYMM16F16, F32 +
    F16F16 +
    F32F32
    - CLDequantizationLayer + CLSinLayer
    • All @@ -761,45 +1252,44 @@ where N = batches, C = channels, H = height, W = width
      srcdst -
      QASYMM8F16, F32 -
      QASYMM8_SIGNEDF16, F32 -
      QSYMM8_PER_CHANNELF16, F32 -
      QSYMM8F16, F32 -
      QSYMM16F16, F32 +
      F16F16 +
      F32F32
      - DirectConvolutionLayer - Function to compute direct convolution. - + CLLogLayer +
        -
      • ANEURALNETWORKS_CONV_2D +
      • All
      - NEDirectConvolutionLayer + + +
      srcdst +
      F16F16 +
      F32F32 +
      + + CLAbsLayer
        -
      • NHWC -
      • NCHW +
      • All
      -
      src0src1src2dst -
      F16F16F16F16 -
      F32F32F32F32 +
      srcdst +
      F16F16 +
      F32F32
      - CLDirectConvolutionLayer + CLRoundLayer
        -
      • NHWC -
      • NCHW +
      • All
      -
      src0src1src2dst -
      F16F16F16F16 -
      F32F32F32F32 -
      QASYMM8QASYMM8S32QASYMM8 -
      QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
      srcdst +
      F16F16 +
      F32F32
      FFT1D @@ -1009,7 +1499,7 @@ where N = batches, C = channels, H = height, W = width
      • ANEURALNETWORKS_FULLY_CONNECTED
      - NEFullyConnectedLayerReshapeWeightsManaged + NEFullyConnectedLayer
      • NHWC @@ -1024,7 +1514,7 @@ where N = batches, C = channels, H = height, W = width QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED - CLFullyConnectedLayerReshapeWeightsManaged + CLFullyConnectedLayer
        • NHWC @@ -1118,7 +1608,7 @@ where N = batches, C = channels, H = height, W = width BFLOAT16BFLOAT16BFLOAT16BFLOAT16 - CLGEMMReshapeRHSMatrixKernelManaged + CLGEMM
          • All @@ -1129,6 +1619,27 @@ where N = batches, C = channels, H = height, W = width F32F32F32F32 F16F16F16F16 + + GEMMConv2D + General Matrix Multiplication. + +
              +
            • ANEURALNETWORKS_CONV_2D +
            + NEGEMMConv2d + +
              +
            • All +
            + + +
            src0src1src2dst +
            QASYMM8QASYMM8S32QASYMM8 +
            QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
            F16F16F16F16 +
            F32F32F32F32 +
            BFLOAT16BFLOAT16BFLOAT16BFLOAT16 +
            GEMMConvolutionLayer General Matrix Multiplication. @@ -1136,7 +1647,7 @@ where N = batches, C = channels, H = height, W = width
            • ANEURALNETWORKS_CONV_2D
            - NEConvolutionLayerReshapeWeights + NEGEMMConvolutionLayer
            • NHWC @@ -1154,7 +1665,7 @@ where N = batches, C = channels, H = height, W = width QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED - CLConvolutionLayerReshapeWeights + CLGEMMConvolutionLayer
              • NHWC @@ -1170,6 +1681,26 @@ where N = batches, C = channels, H = height, W = width QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED + + GEMMDeconvolutionLayer + General Matrix Multiplication. + +
                  +
                • ANEURALNETWORKS_TRANSPOSE_CONV_2D +
                + CLGEMMDeconvolutionLayer + +
                  +
                • NHWC +
                + + +
                src0src1src2dst +
                F16F16F16F16 +
                F32F32F32F32 +
                QASYMM8QASYMM8S32QASYMM8 +
                QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
                GEMMLowpMatrixMultiplyCore General Matrix Multiplication. @@ -1222,6 +1753,38 @@ where N = batches, C = channels, H = height, W = width QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32S32 QASYMM8_SIGNEDQSYMM8S32S32 + + GEMMLowpOutputStage + General Matrix Multiplication. + +
                  +
                • n/a +
                + NEGEMMLowpOutputStage + +
                  +
                • All +
                + + +
                src0src1dst +
                S32S32QASYMM8 +
                S32S32QASYMM8_SIGNED +
                S32S32QSYMM16 +
                + + CLGEMMLowpOutputStage + +
                  +
                • All +
                + + +
                src0src1dst +
                S32S32QASYMM8 +
                S32S32QASYMM8_SIGNED +
                S32S32QSYMM16 +
                GenerateProposalsLayer Function to generate proposals for a RPN (Region Proposal Network). @@ -1318,6 +1881,96 @@ where N = batches, C = channels, H = height, W = width F16F16 F32F32 + + Logical + Function to perform: - Logical AND - Logical OR - Logical NOT + +
                  +
                • n/a +
                + NELogicalAnd + +
                  +
                • All +
                + + +
                src0src1dst +
                U8U8U8 +
                + + NELogicalOr + +
                  +
                • All +
                + + +
                src0src1dst +
                U8U8U8 +
                + + NELogicalNot + +
                  +
                • All +
                + + +
                srcdst +
                U8U8 +
                + + LogicalAnd + Function to perform Logical AND. + +
                  +
                • n/a +
                + CLLogicalAnd + +
                  +
                • All +
                + + +
                src0src1dst +
                U8U8U8 +
                + + LogicalOr + Function to perform Logical OR. + +
                  +
                • n/a +
                + CLLogicalOr + +
                  +
                • All +
                + + +
                src0src1dst +
                U8U8U8 +
                + + LogicalNot + Function to perform Logical NOT. + +
                  +
                • n/a +
                + CLLogicalNot + +
                  +
                • All +
                + + +
                srcdst +
                U8U8 +
                LSTMLayer Function to perform a single time step in a Long Short-Term Memory (LSTM) layer. @@ -1660,7 +2313,7 @@ where N = batches, C = channels, H = height, W = width PriorBoxLayer - Function to . + Function to compute prior boxes and clip.
                • n/a @@ -2150,6 +2803,41 @@ where N = batches, C = channels, H = height, W = width srcdst AllAll + + SoftmaxLayer + Function to compute a SoftmaxLayer and a Log SoftmaxLayer. + +
                    +
                  • ANEURALNETWORKS_LOG_SOFTMAX +
                  • ANEURALNETWORKS_SOFTMAX +
                  + NESoftmaxLayerGeneric + +
                    +
                  • All +
                  + + +
                  srcdst +
                  QASYMM8QASYMM8 +
                  QASYMM8_SIGNEDQASYMM8_SIGNED +
                  F16F16 +
                  F32F32 +
                  + + CLSoftmaxLayerGeneric + +
                    +
                  • All +
                  + + +
                  srcdst +
                  QASYMM8QASYMM8 +
                  QASYMM8_SIGNEDQASYMM8_SIGNED +
                  F16F16 +
                  F32F32 +
                  SpaceToBatchLayer Function to divide a tensor spatially. @@ -2410,6 +3098,25 @@ where N = batches, C = channels, H = height, W = width F16F16F16F16 F32F32F32F32 + + WinogradInputTransform + Function to. + +
                    +
                  • n/a +
                  + CLWinogradInputTransform + +
                    +
                  • NHWC +
                  • NCHW +
                  + + +
                  srcdst +
                  F16F16 +
                  F32F32 +
                  */ -- cgit v1.2.1