From 62687420901c12be609426f3cf9dee300d25746a Mon Sep 17 00:00:00 2001 From: Teresa Charlin Date: Wed, 28 Apr 2021 10:58:49 +0100 Subject: Update operator list documentation. Part 2. All data type and data layout information for the operators are store in the function header files Signed-off-by: Teresa Charlin Change-Id: I30b564f7eda6bbd99bf3ad36ddb6639ac118eb8b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/319829 Tested-by: bsgcomp Reviewed-by: Michele DiGiorgio Comments-Addressed: bsgcomp Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5531 Tested-by: Arm Jenkins Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins --- .../runtime/CL/functions/CLArgMinMaxLayer.h | 14 +- .../CL/functions/CLBatchNormalizationLayer.h | 12 +- .../runtime/CL/functions/CLBatchToSpaceLayer.h | 11 +- arm_compute/runtime/CL/functions/CLBitwiseAnd.h | 8 + arm_compute/runtime/CL/functions/CLBitwiseNot.h | 7 + arm_compute/runtime/CL/functions/CLBitwiseOr.h | 8 + arm_compute/runtime/CL/functions/CLBitwiseXor.h | 8 + .../runtime/CL/functions/CLBoundingBoxTransform.h | 13 +- arm_compute/runtime/CL/functions/CLCast.h | 24 +- .../runtime/CL/functions/CLChannelShuffleLayer.h | 10 +- .../runtime/CL/functions/CLConvolutionLayer.h | 31 +- arm_compute/runtime/CL/functions/CLCropResize.h | 8 + .../runtime/CL/functions/CLDeconvolutionLayer.h | 14 + .../runtime/CL/functions/CLDepthConvertLayer.h | 24 +- .../runtime/CL/functions/CLDepthToSpaceLayer.h | 11 +- .../CL/functions/CLDepthwiseConvolutionLayer.h | 14 + .../runtime/CL/functions/CLDequantizationLayer.h | 19 +- arm_compute/runtime/CL/functions/CLFillBorder.h | 10 +- arm_compute/runtime/CL/functions/CLFlattenLayer.h | 8 + .../runtime/CL/functions/CLFullyConnectedLayer.h | 12 + .../CL/functions/CLFuseBatchNormalization.h | 12 +- arm_compute/runtime/CL/functions/CLGEMM.h | 9 + .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 16 +- .../CL/functions/CLGEMMLowpMatrixMultiplyCore.h | 22 +- arm_compute/runtime/CL/functions/CLGather.h | 10 +- .../CL/functions/CLGenerateProposalsLayer.h | 10 + .../CL/functions/CLInstanceNormalizationLayer.h | 10 + .../runtime/CL/functions/CLL2NormalizeLayer.h | 12 +- arm_compute/runtime/CL/functions/CLLSTMLayer.h | 9 + .../runtime/CL/functions/CLLSTMLayerQuantized.h | 10 +- .../runtime/CL/functions/CLMaxUnpoolingLayer.h | 12 + .../CL/functions/CLMeanStdDevNormalizationLayer.h | 12 +- .../runtime/CL/functions/CLNormalizationLayer.h | 10 + arm_compute/runtime/CL/functions/CLPadLayer.h | 9 + arm_compute/runtime/CL/functions/CLPriorBoxLayer.h | 11 +- arm_compute/runtime/CL/functions/CLQLSTMLayer.h | 8 + .../runtime/CL/functions/CLQuantizationLayer.h | 20 +- arm_compute/runtime/CL/functions/CLRNNLayer.h | 10 + arm_compute/runtime/CL/functions/CLROIAlignLayer.h | 13 +- .../runtime/CL/functions/CLROIPoolingLayer.h | 10 + arm_compute/runtime/CL/functions/CLRange.h | 18 +- arm_compute/runtime/CL/functions/CLReduceMean.h | 13 +- .../runtime/CL/functions/CLReductionOperation.h | 12 + arm_compute/runtime/CL/functions/CLRemap.h | 10 +- arm_compute/runtime/CL/functions/CLReorgLayer.h | 11 +- arm_compute/runtime/CL/functions/CLReverse.h | 10 +- arm_compute/runtime/CL/functions/CLSelect.h | 10 +- .../runtime/CL/functions/CLSpaceToBatchLayer.h | 9 + .../runtime/CL/functions/CLSpaceToDepthLayer.h | 11 +- arm_compute/runtime/CL/functions/CLSplit.h | 14 +- arm_compute/runtime/CL/functions/CLStackLayer.h | 10 +- arm_compute/runtime/CL/functions/CLTile.h | 10 +- arm_compute/runtime/CL/functions/CLUnstack.h | 10 +- .../CL/functions/CLWinogradConvolutionLayer.h | 12 +- .../runtime/NEON/functions/NEArgMinMaxLayer.h | 12 + .../NEON/functions/NEBatchNormalizationLayer.h | 12 +- .../runtime/NEON/functions/NEBatchToSpaceLayer.h | 11 +- arm_compute/runtime/NEON/functions/NEBitwiseAnd.h | 10 +- arm_compute/runtime/NEON/functions/NEBitwiseNot.h | 10 +- arm_compute/runtime/NEON/functions/NEBitwiseOr.h | 10 +- arm_compute/runtime/NEON/functions/NEBitwiseXor.h | 10 +- .../NEON/functions/NEBoundingBoxTransform.h | 11 + arm_compute/runtime/NEON/functions/NECast.h | 26 +- .../runtime/NEON/functions/NEChannelShuffleLayer.h | 10 +- .../runtime/NEON/functions/NEConvolutionLayer.h | 27 +- arm_compute/runtime/NEON/functions/NECropResize.h | 8 + .../runtime/NEON/functions/NEDeconvolutionLayer.h | 14 + .../runtime/NEON/functions/NEDepthConvertLayer.h | 24 +- .../runtime/NEON/functions/NEDepthToSpaceLayer.h | 11 +- .../NEON/functions/NEDepthwiseConvolutionLayer.h | 14 + .../runtime/NEON/functions/NEDequantizationLayer.h | 19 +- arm_compute/runtime/NEON/functions/NEFillBorder.h | 8 + .../runtime/NEON/functions/NEFlattenLayer.h | 10 +- .../runtime/NEON/functions/NEFullyConnectedLayer.h | 13 +- .../NEON/functions/NEFuseBatchNormalization.h | 12 +- arm_compute/runtime/NEON/functions/NEGEMM.h | 10 + .../NEON/functions/NEGEMMConvolutionLayer.h | 15 + .../NEON/functions/NEGEMMLowpMatrixMultiplyCore.h | 20 + arm_compute/runtime/NEON/functions/NEGather.h | 10 +- .../NEON/functions/NEGenerateProposalsLayer.h | 10 + .../NEON/functions/NEInstanceNormalizationLayer.h | 12 +- .../runtime/NEON/functions/NEL2NormalizeLayer.h | 12 +- arm_compute/runtime/NEON/functions/NELSTMLayer.h | 11 +- .../runtime/NEON/functions/NELSTMLayerQuantized.h | 8 + .../runtime/NEON/functions/NEMaxUnpoolingLayer.h | 12 + .../functions/NEMeanStdDevNormalizationLayer.h | 12 +- .../runtime/NEON/functions/NENormalizationLayer.h | 10 + arm_compute/runtime/NEON/functions/NEPadLayer.h | 9 + .../runtime/NEON/functions/NEPriorBoxLayer.h | 11 +- arm_compute/runtime/NEON/functions/NEQLSTMLayer.h | 8 + .../runtime/NEON/functions/NEQuantizationLayer.h | 20 +- arm_compute/runtime/NEON/functions/NERNNLayer.h | 10 + .../runtime/NEON/functions/NEROIAlignLayer.h | 15 +- .../runtime/NEON/functions/NEROIPoolingLayer.h | 9 + arm_compute/runtime/NEON/functions/NERange.h | 17 +- arm_compute/runtime/NEON/functions/NEReduceMean.h | 13 +- .../runtime/NEON/functions/NEReductionOperation.h | 16 +- arm_compute/runtime/NEON/functions/NERemap.h | 8 + arm_compute/runtime/NEON/functions/NEReorgLayer.h | 11 +- arm_compute/runtime/NEON/functions/NEReverse.h | 10 +- arm_compute/runtime/NEON/functions/NESelect.h | 10 +- .../runtime/NEON/functions/NESpaceToBatchLayer.h | 9 + .../runtime/NEON/functions/NESpaceToDepthLayer.h | 9 + arm_compute/runtime/NEON/functions/NESplit.h | 14 +- arm_compute/runtime/NEON/functions/NEStackLayer.h | 10 +- arm_compute/runtime/NEON/functions/NETile.h | 10 +- arm_compute/runtime/NEON/functions/NEUnstack.h | 10 +- .../NEON/functions/NEWinogradConvolutionLayer.h | 10 + arm_compute/runtime/OperatorList.h | 301 ++- docs/09_operators_list.dox | 2156 +++++++++++++++++--- src/core/CL/cl_kernels/bounding_box_transform.cl | 4 +- .../cl_kernels/bounding_box_transform_quantized.cl | 4 +- src/core/CL/cl_kernels/crop_tensor.cl | 4 +- src/core/CL/cl_kernels/depth_to_space.cl | 10 +- src/core/NEON/kernels/NEReductionOperationKernel.h | 4 +- 115 files changed, 3206 insertions(+), 601 deletions(-) diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h index c254284cd7..a971163c45 100644 --- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -64,6 +64,18 @@ public: /** Default destructor */ ~CLArgMinMaxLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:---------| + * |QASYMM8 |U32, S32 | + * |QASYMM8_SIGNED |U32, S32 | + * |S32 |U32, S32 | + * |F16 |U32, S32 | + * |F32 |U32, S32 | * * @param[in] input Input source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. * @param[in] axis Axis to find max/min index. diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h index c8acf9fc6b..fcfeb5ea3b 100644 --- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,16 @@ public: /** Default destructor */ ~CLBatchNormalizationLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F32 |F32 | + * |F16 |F16 | * * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place * diff --git a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h index bdb58531d0..f6ba2b0b02 100644 --- a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h +++ b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,6 +54,15 @@ public: /** Default destructor */ ~CLBatchToSpaceLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:---------|:---------|:----------| + * |All |s32 |All | * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h index a703242875..b30be9b24f 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h @@ -40,6 +40,14 @@ class CLBitwiseAnd : public ICLSimpleFunction { public: /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | * * @param[in] input1 Input tensor. Data types supported: U8. * @param[in] input2 Input tensor. Data types supported: U8. diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h index 6f65749d9f..1456ebe57e 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseNot.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h @@ -40,6 +40,13 @@ class CLBitwiseNot : public ICLSimpleFunction { public: /** Initialize the function + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | * * @param[in] input Input tensor. Data types supported: U8. * @param[out] output Output tensor. Data types supported: U8. diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h index 3c904fb903..ff0a1f0d73 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseOr.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h @@ -40,6 +40,14 @@ class CLBitwiseOr : public ICLSimpleFunction { public: /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | * * @param[in] input1 Input tensor. Data types supported: U8. * @param[in] input2 Input tensor. Data types supported: U8. diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h index a33a64ad71..0cd9d073b4 100644 --- a/arm_compute/runtime/CL/functions/CLBitwiseXor.h +++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h @@ -40,6 +40,14 @@ class CLBitwiseXor : public ICLSimpleFunction { public: /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | * * @param[in] input1 Input tensor. Data types supported: U8. * @param[in] input2 Input tensor. Data types supported: U8. diff --git a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h index d6409106da..d3499c3949 100644 --- a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h +++ b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,17 @@ class CLBoundingBoxTransform : public ICLSimpleFunction { public: /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM16 |QASYMM8 |QASYMM16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h index bd333d4e72..6e4cf62547 100644 --- a/arm_compute/runtime/CL/functions/CLCast.h +++ b/arm_compute/runtime/CL/functions/CLCast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,17 +41,21 @@ class CLCast : public ICLSimpleFunction public: /** Initialize the function's source, destination * - * Input data type must be different than output data type. + * Valid data layouts: + * - All * - * Valid conversions Input -> Output : + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------------------------------| + * |U8 | S8, U16, S16, U32, S32, F16, F32 | + * |U16 | U8, S8, S16, U32, S32, F16, F32 | + * |S16 | U8, S8, U16, U32, S32, F16, F32 | + * |U32 | U8, S8, U16, S16, S32, F16, F32 | + * |S32 | U8, S8, U16, S16, U32, F16, F32 | + * |F16 | U8, S8, U16, S16, U32, F32 | + * |F32 | U8, S8, U16, S16, U32, F16 | * - * - U8 -> S8, U16, S16, U32, S32, F16, F32 - * - U16 -> U8, S8, S16, U32, S32, F16, F32 - * - S16 -> U8, S8, U16, U32, S32, F16, F32 - * - U32 -> U8, S8, U16, S16, S32, F16, F32 - * - S32 -> U8, S8, U16, S16, U32, F16, F32 - * - F16 -> U8, S8, U16, S16, U32, F32 - * - F32 -> U8, S8, U16, S16, U32, F16 + * Input data type must be different than output data type. * * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h index 54cf59f59a..d60548d9cc 100644 --- a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h +++ b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,14 @@ class CLChannelShuffleLayer : public ICLSimpleFunction { public: /** Initialize the function + * + * Valid data layouts: + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input Input tensor. Data types supported: All. * @param[out] output Output tensor. Data type supported: Same as @p input diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index d1de721193..6884754d83 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -84,14 +84,28 @@ public: /** Default move assignment operator */ CLConvolutionLayer &operator=(CLConvolutionLayer &&) = default; /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. + * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -111,9 +125,9 @@ public: * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. + * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -133,8 +147,9 @@ public: * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. + * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -156,7 +171,7 @@ public: * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8. + * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. diff --git a/arm_compute/runtime/CL/functions/CLCropResize.h b/arm_compute/runtime/CL/functions/CLCropResize.h index 0dc3c48b32..5c60c2879c 100644 --- a/arm_compute/runtime/CL/functions/CLCropResize.h +++ b/arm_compute/runtime/CL/functions/CLCropResize.h @@ -60,6 +60,14 @@ public: ~CLCropResize(); /** Configure kernel + * + * Valid data layouts: + * - NHWC + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------|:--------|:------|:--------| + * |All |F32 |F32 |F32 | * * @note Supported tensor rank: up to 4 * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used. diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h index 4be8c17835..2dd4cd4bf5 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h @@ -45,6 +45,20 @@ public: CLDeconvolutionLayer(std::shared_ptr memory_manager = nullptr); /** Set the input, weights, biases and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | * * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input or QSYMM8_PER_CHANNEL if @p input is QASYMM8/QASYMM8_SIGNED. diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h index b0f297aec5..34dfdd7f3a 100644 --- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,17 +41,21 @@ class CLDepthConvertLayer : public ICLSimpleFunction public: /** Initialize the function's source, destination * - * Input data type must be different than output data type. + * Valid data layouts: + * - All * - * Valid conversions Input -> Output : + * Valid data type configurations: + * |src |dst | + * |:--------------|:-------------------------------------| + * |U8 | S8, U16, S16, U32, S32, F16, F32 | + * |U16 | U8, S8, S16, U32, S32, F16, F32 | + * |S16 | U8, S8, U16, U32, S32, F16, F32 | + * |U32 | U8, S8, U16, S16, S32, F16, F32 | + * |S32 | U8, S8, U16, S16, U32, F16, F32 | + * |F16 | U8, S8, U16, S16, U32, F32 | + * |F32 | U8, S8, U16, S16, U32, F16 | * - * - U8 -> S8, U16, S16, U32, S32, F16, F32 - * - U16 -> U8, S8, S16, U32, S32, F16, F32 - * - S16 -> U8, S8, U16, U32, S32, F16, F32 - * - U32 -> U8, S8, U16, S16, S32, F16, F32 - * - S32 -> U8, S8, U16, S16, U32, F16, F32 - * - F16 -> U8, S8, U16, S16, U32, F32 - * - F32 -> U8, S8, U16, S16, U32, F16 + * Input data type must be different than output data type. * * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h index a0aa288dbf..0026cc2b67 100644 --- a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,15 @@ class CLDepthToSpaceLayer : public ICLSimpleFunction { public: /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[out] output Tensor output. Data types supported: same as @p input diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index 1af9e1dc6f..f31a17d9cb 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -57,6 +57,20 @@ public: /** Default destructor */ ~CLDepthwiseConvolutionLayer(); /** Initialize the function's source, destination, weights and convolution information. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | * * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h index 4a5c3a3203..601c13d0e4 100644 --- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h @@ -58,18 +58,13 @@ public: * - All * * Valid data type configurations: - * |src |dst | - * |:------------------|:--------------| - * |QASYMM8 |F16 | - * |QASYMM8 |F32 | - * |QASYMM8_SIGNED |F16 | - * |QASYMM8_SIGNED |F32 | - * |QSYMM8_PER_CHANNEL |F16 | - * |QSYMM8_PER_CHANNEL |F32 | - * |QSYMM8 |F16 | - * |QSYMM8 |F32 | - * |QSYMM16 |F16 | - * |QSYMM16 |F32 | + * |src |dst | + * |:------------------|:---------| + * |QASYMM8 |F16, F32 | + * |QASYMM8_SIGNED |F16, F32 | + * |QSYMM8_PER_CHANNEL |F16, F32 | + * |QSYMM8 |F16, F32 | + * |QSYMM16 |F16, F32 | * * @param[in] input Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h index a4ad82dfd4..20f2e15b72 100644 --- a/arm_compute/runtime/CL/functions/CLFillBorder.h +++ b/arm_compute/runtime/CL/functions/CLFillBorder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,14 @@ class CLFillBorder : public ICLSimpleFunction { public: /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in,out] tensor Source tensor. Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. * @param[in] border_width The border width diff --git a/arm_compute/runtime/CL/functions/CLFlattenLayer.h b/arm_compute/runtime/CL/functions/CLFlattenLayer.h index 8dedd7458d..d2c37b1c22 100644 --- a/arm_compute/runtime/CL/functions/CLFlattenLayer.h +++ b/arm_compute/runtime/CL/functions/CLFlattenLayer.h @@ -43,6 +43,14 @@ class CLFlattenLayer : public IFunction { public: /** Initialise the kernel's input and output. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input First input tensor to flatten with at least 3 dimensions. * The dimensions above the third will be interpreted as batches. Data types supported: All. diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h index 75cb2dc1fa..eec01bcebe 100644 --- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -116,6 +116,18 @@ public: /** Default move assignment operator */ CLFullyConnectedLayer &operator=(CLFullyConnectedLayer &&) = default; /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | * * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. The weights must be 2 dimensional. diff --git a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h index e35905fcf1..cd75270392 100644 --- a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h +++ b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,6 +54,16 @@ public: /** Default destructor */ ~CLFuseBatchNormalization(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F32 |F32 | + * |F16 |F16 | * * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index 8a210a2ba5..1e2ae7be64 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -125,6 +125,15 @@ public: /** Default destructor */ ~CLGEMM(); /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:------------|:-----------|:---------|:--------------| + * |F32 |F32 |F32 |F32 | + * |F16 |F16 |F16 |F16 | * * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. * diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index 4dbd0f828a..082b481047 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -193,6 +193,20 @@ public: /**Default destructor */ ~CLGEMMConvolutionLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h index 4cc8899690..e7f4cb9d01 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -60,6 +60,26 @@ public: /** Default destructor */ ~CLGEMMLowpMatrixMultiplyCore(); /** Initialise the kernel's inputs, output + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QASYMM8 |S32 |S32 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |S32 | + * |QASYMM8 |QSYMM8 |S32 |S32 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8 |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 | + * |QASYMM8_SIGNED |QSYMM8 |S32 |S32 | * * @note GEMMLowp: low precision GEMM kernel. [A * B + C] * This kernel performs the following computations: diff --git a/arm_compute/runtime/CL/functions/CLGather.h b/arm_compute/runtime/CL/functions/CLGather.h index 9c659be6fc..7a57c7358c 100644 --- a/arm_compute/runtime/CL/functions/CLGather.h +++ b/arm_compute/runtime/CL/functions/CLGather.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,6 +39,14 @@ class CLGather : public ICLSimpleFunction { public: /** Initialise the kernel's inputs and outputs + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h index bea470712c..aec5cdf1a8 100644 --- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h +++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h @@ -76,6 +76,16 @@ public: ~CLGenerateProposalsLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QSYMM8 |QSYMM16 |QASYMM8 | * * @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. * Data types supported: QASYMM8/F16/F32 diff --git a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h index a6e5b1622b..985a6a75f7 100644 --- a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h @@ -64,6 +64,16 @@ public: ~CLInstanceNormalizationLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization. * Data types supported: F16/F32. Data layout supported: NHWC, NCHW diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h index 401d249eb4..4dc5c778d2 100644 --- a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h +++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -64,6 +64,16 @@ public: CLL2NormalizeLayer &operator=(CLL2NormalizeLayer &&) = default; /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index 38a24d030b..d26b4c5595 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -74,6 +74,15 @@ public: /** Default destructor */ ~CLLSTMLayer(); /** Initialize function's tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src13 | dst0 - dst3 | + * |:------------|:------------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h index 0829052384..2ef7427a5a 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -72,6 +72,14 @@ public: /** Default move assignment operator */ CLLSTMLayerQuantized &operator=(CLLSTMLayerQuantized &&) = default; /** Initialize function's tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 | + * |:-----------|:------------|:-------|:------|:------|:------| + * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8| * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. diff --git a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h index 24d620d372..f7ff1234f6 100644 --- a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h +++ b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h @@ -55,6 +55,18 @@ public: /** Default destructor */ ~CLMaxUnpoolingLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | * * @note Output shape must be equal to the shape of the original input to pool. * diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h index cfe59eac09..68a7df24e6 100644 --- a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,16 @@ class CLMeanStdDevNormalizationLayer : public ICLSimpleFunction { public: /** Initialise the function's input and outputs. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F32 |F32 | + * |F16 |F16 | * * @note If the output tensor is a nullptr, the normalization will be performed in-place. * diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h index 706cb6f152..15406f7728 100644 --- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h @@ -60,6 +60,16 @@ public: /** Default destructor */ ~CLNormalizationLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F32 |F32 | + * |F16 |F16 | * * @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32 (Written to by the border handler). diff --git a/arm_compute/runtime/CL/functions/CLPadLayer.h b/arm_compute/runtime/CL/functions/CLPadLayer.h index dae95f63e6..7f950bcfb3 100644 --- a/arm_compute/runtime/CL/functions/CLPadLayer.h +++ b/arm_compute/runtime/CL/functions/CLPadLayer.h @@ -58,6 +58,15 @@ public: ~CLPadLayer(); /** Initialize the function + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |All |All | * * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data type supported: same as @p input diff --git a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h index 9129bfd064..9b36c9e433 100644 --- a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h +++ b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,15 @@ public: /** Constructor */ CLPriorBoxLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------|:--------|:--------| + * |F32 |F32 |F32 | * * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h index 954f224424..bd00d56468 100644 --- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h @@ -73,6 +73,14 @@ public: /** Default destructor */ ~CLQLSTMLayer(); /** Initialize function's tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 - src6 |src7 -src9 |src10 |src11 |dst0 |dst1 - dst2 | + * |:-------------|:------------|:------------|:------|:-------------|:------|:-----------------| + * |QASYMM8_SIGNED|QASYMM8 |S32 |QSYMM16|QASYMM8_SIGNED|QSYMM16|QASYMM8_SIGNED | * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8. diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h index 6a44a226d4..a61735cb97 100644 --- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h @@ -63,20 +63,12 @@ public: * - All * * Valid data type configurations: - * |src |dst | - * |:------------------|:--------------| - * |QASYMM8 |QASYMM8 | - * |QASYMM8 |QASYMM8_SIGNED | - * |QASYMM8 |QASYMM16 | - * |QASYMM8_SIGNED |QASYMM8 | - * |QASYMM8_SIGNED |QASYMM8_SIGNED | - * |QASYMM8_SIGNED |QASYMM16 | - * |F16 |QASYMM8 | - * |F16 |QASYMM8_SIGNED | - * |F16 |QASYMM16 | - * |F32 |QASYMM8 | - * |F32 |QASYMM8_SIGNED | - * |F32 |QASYMM16 | + * |src |dst | + * |:------------------|:----------------------------------| + * |QASYMM8 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |QASYMM8_SIGNED |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |F16 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |F32 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | * * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32. * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h index 50575daaa3..2b3b35e37d 100644 --- a/arm_compute/runtime/CL/functions/CLRNNLayer.h +++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h @@ -50,6 +50,16 @@ public: /** Default destructor */ ~CLRNNLayer(); /** Initialize the function + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |src3 |dst0 |dst1 | + * |:------|:------|:------|:------|:------|:------| + * |F16 |F16 |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 |F32 |F32 | * * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32 * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input diff --git a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h index b4cd5560ef..1eaea1b297 100644 --- a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h +++ b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,17 @@ class CLROIAlignLayer : public ICLSimpleFunction { public: /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * |QASYMM8 |QASYMM16 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM16 |QASYMM8_SIGNED | * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner diff --git a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h index a4c5c76f2e..151586a1f6 100644 --- a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h +++ b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h @@ -44,6 +44,16 @@ class CLROIPoolingLayer : public ICLSimpleFunction { public: /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |U16 |F16 | + * |F32 |U16 |F32 | + * |QASYMM8 |U16 |QASYMM8 | * * @param[in] input Source tensor. Data types supported: F16/F32/QASYMM8 * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner diff --git a/arm_compute/runtime/CL/functions/CLRange.h b/arm_compute/runtime/CL/functions/CLRange.h index e11e740861..fbce05162c 100644 --- a/arm_compute/runtime/CL/functions/CLRange.h +++ b/arm_compute/runtime/CL/functions/CLRange.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,22 @@ class CLRange : public ICLSimpleFunction { public: /** Initialize the kernel's start, end, step and output tensor. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |dst | + * |:---------| + * |U8 | + * |S8 | + * |QASYMM8 | + * |U16 | + * |S16 | + * |U32 | + * |S32 | + * |F16 | + * |F32 | * * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. * @param[in] start The starting value of the sequence. diff --git a/arm_compute/runtime/CL/functions/CLReduceMean.h b/arm_compute/runtime/CL/functions/CLReduceMean.h index c37ee8c5ab..1ce088b2ce 100644 --- a/arm_compute/runtime/CL/functions/CLReduceMean.h +++ b/arm_compute/runtime/CL/functions/CLReduceMean.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,17 @@ public: /** Default constructor */ CLReduceMean(std::shared_ptr memory_manager = nullptr); /** Configure kernel + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | * * @note Supported tensor rank: up to 4 * diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h index 58164fdcb3..2245735b62 100644 --- a/arm_compute/runtime/CL/functions/CLReductionOperation.h +++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h @@ -61,6 +61,18 @@ public: CLReductionOperation &operator=(CLReductionOperation &&) = default; /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * |S32 |S32 | * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32. * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h index 87d5f9fec7..39ee13b8d0 100644 --- a/arm_compute/runtime/CL/functions/CLRemap.h +++ b/arm_compute/runtime/CL/functions/CLRemap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,6 +46,14 @@ class CLRemap : public ICLSimpleFunction { public: /** Initialise the function's sources, destination, interpolation policy and border mode. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:------|:------|:------|:------| + * |U8 |F32 |F32 |U 8 | * * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) * @param[in] map_x Map for X coords. Data types supported: F32. diff --git a/arm_compute/runtime/CL/functions/CLReorgLayer.h b/arm_compute/runtime/CL/functions/CLReorgLayer.h index 0840fd13fd..976b8f6213 100644 --- a/arm_compute/runtime/CL/functions/CLReorgLayer.h +++ b/arm_compute/runtime/CL/functions/CLReorgLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,6 +37,15 @@ class CLReorgLayer : public ICLSimpleFunction { public: /** Initialise the function's source and destination. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input Source tensor. Data types supported: All. * @param[out] output Destination tensor with tensor shape: diff --git a/arm_compute/runtime/CL/functions/CLReverse.h b/arm_compute/runtime/CL/functions/CLReverse.h index 81fa04b1f5..94c63ca92d 100644 --- a/arm_compute/runtime/CL/functions/CLReverse.h +++ b/arm_compute/runtime/CL/functions/CLReverse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,14 @@ class CLReverse : public ICLSimpleFunction { public: /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |All |U32 |All | * * @param[in] input Input tensor. Data types supported: All. * @param[out] output Output tensor. Data type supported: Same as @p input diff --git a/arm_compute/runtime/CL/functions/CLSelect.h b/arm_compute/runtime/CL/functions/CLSelect.h index 7fd52312fb..8b1e6b2019 100644 --- a/arm_compute/runtime/CL/functions/CLSelect.h +++ b/arm_compute/runtime/CL/functions/CLSelect.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,6 +39,14 @@ class CLSelect : public ICLSimpleFunction { public: /** Initialise the kernel's inputs and output. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:------|:--------------| + * |U8 |All |All |All | * * @param[in] c Condition input tensor. Data types supported: U8. * @param[in] x First input tensor. Data types supported: All. diff --git a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h index dc02fa1363..304a74137e 100644 --- a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h +++ b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h @@ -59,6 +59,15 @@ public: /** Default destructor */ ~CLSpaceToBatchLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:---------|:---------|:---------|:---------| + * |All |S32 |S32 |All | * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Supported M: 2. Data types supported: S32 diff --git a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h index 9e476fe7bd..8a47e95f9d 100644 --- a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h +++ b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,6 +53,15 @@ public: /** Default destructor */ ~CLSpaceToDepthLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[out] output Tensor output. Data types supported: same as @p input diff --git a/arm_compute/runtime/CL/functions/CLSplit.h b/arm_compute/runtime/CL/functions/CLSplit.h index 2931203765..86c7bdde7d 100644 --- a/arm_compute/runtime/CL/functions/CLSplit.h +++ b/arm_compute/runtime/CL/functions/CLSplit.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,18 @@ namespace arm_compute class CLSplit : public CPPSplit { public: + /** CLSplit + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * + */ + // Inherited methods overridden: void run() override; }; diff --git a/arm_compute/runtime/CL/functions/CLStackLayer.h b/arm_compute/runtime/CL/functions/CLStackLayer.h index 3861fd299a..54c903a706 100644 --- a/arm_compute/runtime/CL/functions/CLStackLayer.h +++ b/arm_compute/runtime/CL/functions/CLStackLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -58,6 +58,14 @@ public: /** Default destructor */ ~CLStackLayer(); /** Initialise the kernel's inputs vector and output. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @note Supported input tensor rank: up to 4 * diff --git a/arm_compute/runtime/CL/functions/CLTile.h b/arm_compute/runtime/CL/functions/CLTile.h index 69743693ff..c266adbbd4 100644 --- a/arm_compute/runtime/CL/functions/CLTile.h +++ b/arm_compute/runtime/CL/functions/CLTile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,14 @@ class CLTile : public ICLSimpleFunction { public: /** Set the source, destination of the kernel + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input Source tensor. Data type supported: All. * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. diff --git a/arm_compute/runtime/CL/functions/CLUnstack.h b/arm_compute/runtime/CL/functions/CLUnstack.h index 5d4d5710ab..32ad439b70 100644 --- a/arm_compute/runtime/CL/functions/CLUnstack.h +++ b/arm_compute/runtime/CL/functions/CLUnstack.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,14 @@ public: /** Default constructor */ CLUnstack(); /** Set the input, output and unstacking axis. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input A tensor to be unstacked. Data type supported: All. * @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input. diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h index 9ced69c1bb..7b42932f82 100644 --- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -61,6 +61,16 @@ public: /** Default destructor */ ~CLWinogradConvolutionLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | * * @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout * @note Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h index cbf1d5b444..4392de7b28 100644 --- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h @@ -63,6 +63,18 @@ public: /** Default destructor */ ~NEArgMinMaxLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:----------| + * |QASYMM8 |U32, S32 | + * |QASYMM8_SIGNED |U32, S32 | + * |S32 |U32, S32 | + * |F16 |U32, S32 | + * |F32 |U32, S32 | * * @param[in] input Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32. * @param[in] axis Axis to find max/min index. diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h index 6d56a267a7..ec00fbdbf2 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,16 @@ public: /** Default destructor */ ~NEBatchNormalizationLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F32 |F32 | + * |F16 |F16 | * * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place * diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h index c2fd26d34c..810bf81a22 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -51,6 +51,15 @@ public: /** Default destructor */ ~NEBatchToSpaceLayer() = default; /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:---------|:---------|:----------| + * |All |s32 |All | * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h index 3203d2b9a7..1f95f193d3 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,14 @@ public: /** Default destructor */ ~NEBitwiseAnd() = default; /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h index 9fa0d38caf..c66bebf7cc 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,6 +35,14 @@ class NEBitwiseNot : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's input and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | * * @param[in] input Input tensor. Data type supported: U8. * @param[out] output Output tensor. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h index fba6b784de..183df212e4 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,6 +35,14 @@ class NEBitwiseOr : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h index c6cb584284..126aaa6ddd 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,6 +35,14 @@ class NEBitwiseXor : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |U8 |U8 | * * @param[in] input1 First tensor input. Data type supported: U8. * @param[in] input2 Second tensor input. Data type supported: U8. diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h index c377520a12..2a196a2de5 100644 --- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h +++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h @@ -37,6 +37,17 @@ class NEBoundingBoxTransform : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |QASYMM16 |QASYMM8 |QASYMM16 | + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | * * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h index e536317660..eb7de1fadb 100644 --- a/arm_compute/runtime/NEON/functions/NECast.h +++ b/arm_compute/runtime/NEON/functions/NECast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,18 +40,22 @@ class NECast : public INESimpleFunctionNoBorder public: /** Initialize the function's source, destination * - * Input data type must be different than output data type. + * Valid data layouts: + * - All * - * Valid conversions Input -> Output : + * Valid data type configurations: + * |src |dst | + * |:--------------|:-----------------------------------------------| + * |QASYMM8_SIGNED | S16, S32, F32, F16 | + * |QASYMM8 | U16, S16, S32, F32, F16 | + * |U8 | U16, S16, S32, F32, F16 | + * |U16 | U8, U32 | + * |S16 | QASYMM8_SIGNED, U8, S32 | + * |F16 | QASYMM8_SIGNED, QASYMM8, F32, S32, U8 | + * |S32 | QASYMM8_SIGNED, QASYMM8, F16, F32, U8 | + * |F32 | QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8| * - * - QASYMM8_SIGNED -> S16, S32, F32, F16 - * - QASYMM8 -> U16, S16, S32, F32, F16 - * - U8 -> U16, S16, S32, F32, F16 - * - U16 -> U8, U32 - * - S16 -> QASYMM8_SIGNED, U8, S32 - * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8 - * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8 - * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8 + * Input data type must be different than output data type. * * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32. * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/BFLOAT16/F16/F32. diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h index aa11396c20..8888efec4f 100644 --- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h +++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,14 @@ class NEChannelShuffleLayer : public INESimpleFunctionNoBorder { public: /** Initialize the function + * + * Valid data layouts: + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index b1e85523c5..f19aa8008b 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -85,13 +85,28 @@ public: /** Default destructor */ ~NEConvolutionLayer() = default; /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -110,9 +125,10 @@ public: * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -134,7 +150,8 @@ public: * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h index 7dcf925650..143bbbc6f1 100644 --- a/arm_compute/runtime/NEON/functions/NECropResize.h +++ b/arm_compute/runtime/NEON/functions/NECropResize.h @@ -53,6 +53,14 @@ public: ~NECropResize(); /** Configure kernel + * + * Valid data layouts: + * - NHWC + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------|:--------|:------|:--------| + * |All |F32 |F32 |F32 | * * @note Supported tensor rank: up to 4 * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used. diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index c16cf26095..34ab0707c2 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -89,6 +89,20 @@ public: virtual ~NEDeconvolutionLayer() = default; /** Set the input, weights, biases and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | * * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h index c9817a63c1..17cf539717 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,15 +48,21 @@ public: ~NEDepthConvertLayer() = default; /** Initialize the function's source, destination * - * Valid conversions Input -> Output : + * Valid data layouts: + * - All * - * - QASYMM8 -> F16, F32 - * - U8 -> U16, S16, S32 - * - U16 -> U8, U32 - * - S16 -> U8, S32 - * - BFLOAT16 -> F32 - * - F16 -> QASYMM8, F32 - * - F32 -> QASYMM8, F16, BFLOAT16 + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------------------| + * |QASYMM8 | F16, F32 | + * |U8 | U16, S16, S32 | + * |U16 | U8, U32 | + * |S16 | U8, S32 | + * |BFLOAT16 | F32 | + * |F16 | QASYMM8, F32 | + * |F32 | QASYMM8, F16, BFLOAT16 | + * + * Input data type must be different than output data type. * * @param[in] input The input tensor to convert. Data types supported: QASYMM8/U8/U16/S16/BFLOAT16/F16/F32. * @param[out] output The output tensor. Data types supported: QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32. diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h index 51f7ff7770..b9bdcd1f11 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -52,6 +52,15 @@ public: /** Default destructor */ ~NEDepthToSpaceLayer() = default; /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All * @param[out] output Tensor output. Data types supported: same as @p input diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index c74b2a93ee..2f541758f4 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -54,6 +54,20 @@ public: /** Default destructor */ ~NEDepthwiseConvolutionLayer(); /** Initialize the function's source, destination, weights and convolution information. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | * * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 * @param[out] output Destination tensor. Data type supported: same as @p input. diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h index dfec835f45..91ed056cf3 100644 --- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h @@ -57,18 +57,13 @@ public: * - All * * Valid data type configurations: - * |src |dst | - * |:------------------|:--------------| - * |QASYMM8 |F16 | - * |QASYMM8 |F32 | - * |QASYMM8_SIGNED |F16 | - * |QASYMM8_SIGNED |F32 | - * |QSYMM8_PER_CHANNEL |F16 | - * |QSYMM8_PER_CHANNEL |F32 | - * |QSYMM8 |F16 | - * |QSYMM8 |F32 | - * |QSYMM16 |F16 | - * |QSYMM16 |F32 | + * |src |dst | + * |:------------------|:-----------| + * |QASYMM8 |F16, F32 | + * |QASYMM8_SIGNED |F16, F32 | + * |QSYMM8_PER_CHANNEL |F16, F32 | + * |QSYMM8 |F16, F32 | + * |QSYMM16 |F16, F32 | * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h index 8a8a0c7dc2..ab77c28839 100644 --- a/arm_compute/runtime/NEON/functions/NEFillBorder.h +++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h @@ -41,6 +41,14 @@ class NEFillBorder : public IFunction public: NEFillBorder(); /** Initialize the function's source, destination and border_mode. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @note This function fills the borders within the XY-planes. * diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h index 1104aac77f..e688e918d9 100644 --- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,14 @@ class NEFlattenLayer : public IFunction { public: /** Initialise the kernel's input and output. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: All * @param[out] output Output tensor with shape [w*h*d, input_batches] where: diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index bc45e58b4b..9727e108a5 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -36,7 +36,6 @@ namespace arm_compute { - namespace weights_transformations { /** Basic function to manage the reshape weights generated from @ref NETranspose */ @@ -101,6 +100,18 @@ public: /** Default destructor */ ~NEFullyConnectedLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | * * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. The weights must be 2 dimensional. diff --git a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h index 5dc804e240..3dd7f49044 100644 --- a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h +++ b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -51,6 +51,16 @@ public: /** Default destructor */ ~NEFuseBatchNormalization(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F32 |F32 | + * |F16 |F16 | * * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index a6c3436656..d4a9f68beb 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -75,6 +75,16 @@ public: /** Default destructor */ ~NEGEMM(); /** Initialise the kernel's inputs, output + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:------------|:-----------|:---------|:--------------| + * |F32 |F32 |F32 |F32 | + * |F16 |F16 |F16 |F16 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | * * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function. diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index 9897bf1d4d..e89eae1d31 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -176,6 +176,21 @@ public: /** Default destructor */ ~NEGEMMConvolutionLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index b2b77bd406..780723e752 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -75,6 +75,26 @@ public: /** Default destructor */ ~NEGEMMLowpMatrixMultiplyCore(); /** Initialise the kernel's inputs, output + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QASYMM8 |S32 |S32 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |S32 | + * |QASYMM8 |QSYMM8 |S32 |S32 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8 |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 | + * |QASYMM8_SIGNED |QSYMM8 |S32 |S32 | * * @note GEMM_LOWP: low precision GEMM kernel * This kernel performs the following computations: diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h index a5e0461227..393a38ee4d 100644 --- a/arm_compute/runtime/NEON/functions/NEGather.h +++ b/arm_compute/runtime/NEON/functions/NEGather.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,6 +39,14 @@ class NEGather : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and outputs + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h index 22c6ba2ed6..3b683382ec 100644 --- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h @@ -71,6 +71,16 @@ public: ~NEGenerateProposalsLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:--------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QSYMM8 |QSYMM16 |QASYMM8 | * * @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. * Data types supported: QASYMM8/F16/F32 diff --git a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h index 57165c94b4..bb0697072b 100644 --- a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,16 @@ public: /** Default destructor */ ~NEInstanceNormalizationLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization. * Data types supported: F16/F32. Data layout supported: NHWC, NCHW diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h index 173b9d2141..7f1a5e785e 100644 --- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,16 @@ public: /** Default destructor */ ~NEL2NormalizeLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in, out] input Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0) * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index ef8defb827..075fb4530a 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,15 @@ public: /** Default destructor */ ~NELSTMLayer(); /** Initialize function's tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src13 | dst0 - dst3 | + * |:------------|:------------| + * |F16 |F16 | + * |F32 |F32 | * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h index 53a024ae04..2f0c753691 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h @@ -76,6 +76,14 @@ public: /** Default destructor */ ~NELSTMLayerQuantized(); /** Initialize function's tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 | + * |:-----------|:------------|:-------|:------|:------|:------| + * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8| * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h index fae26b3c93..41ea040457 100644 --- a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h @@ -56,6 +56,18 @@ public: /** Default destructor */ ~NEMaxUnpoolingLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | * * @note Only supported pool size 2 * diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h index 31e376191c..41aa81946b 100644 --- a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,6 +49,16 @@ public: /** Default destructor */ ~NEMeanStdDevNormalizationLayer(); /** Initialise the function's input and outputs. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F32 |F32 | + * |F16 |F16 | * * @note If the output tensor is a nullptr, the normalization will be performed in-place. * diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h index 8c4ad1516e..fbe000445c 100644 --- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h @@ -62,6 +62,16 @@ public: /** Default destructor */ ~NENormalizationLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |F32 |F32 | + * |F16 |F16 | * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], * and an optional 4th dimension for batch of inputs. Data type supported: F16/F32. Data layouts supported: NCHW/NHWC. diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h index 76ff0643a0..4aa6725496 100644 --- a/arm_compute/runtime/NEON/functions/NEPadLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h @@ -64,6 +64,15 @@ public: /** Default destructor */ ~NEPadLayer(); /** Initialize the function + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------|:---------| + * |All |All | * * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data type supported: same as @p input diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h index 3cc79fa28e..38e0c9f3ad 100644 --- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,6 +37,15 @@ class NEPriorBoxLayer : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------|:--------|:--------| + * |F32 |F32 |F32 | * * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h index e706179415..7c2e9bc5a1 100644 --- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h @@ -75,6 +75,14 @@ public: /** Default destructor */ ~NEQLSTMLayer(); /** Initialize function's tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 - src6 |src7 -src9 |src10 |src11 |dst0 |dst1 - dst2 | + * |:-------------|:------------|:------------|:------|:-------------|:------|:-----------------| + * |QASYMM8_SIGNED|QASYMM8 |S32 |QSYMM16|QASYMM8_SIGNED|QSYMM16|QASYMM8_SIGNED | * * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED. * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8. diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h index a7fadfc7cd..eeca2bb1db 100644 --- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h @@ -56,20 +56,12 @@ public: * - All * * Valid data type configurations: - * |src |dst | - * |:------------------|:--------------| - * |QASYMM8 |QASYMM8 | - * |QASYMM8 |QASYMM8_SIGNED | - * |QASYMM8 |QASYMM16 | - * |QASYMM8_SIGNED |QASYMM8 | - * |QASYMM8_SIGNED |QASYMM8_SIGNED | - * |QASYMM8_SIGNED |QASYMM16 | - * |F16 |QASYMM8 | - * |F16 |QASYMM8_SIGNED | - * |F16 |QASYMM16 | - * |F32 |QASYMM8 | - * |F32 |QASYMM8_SIGNED | - * |F32 |QASYMM16 | + * |src |dst | + * |:------------------|:--------------------------------------| + * |QASYMM8 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |QASYMM8_SIGNED |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |F16 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | + * |F32 |QASYMM8, QASYMM8_SIGNED, QASYMM16 | * * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16 diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h index 66f7f2ea3f..667d3144ac 100644 --- a/arm_compute/runtime/NEON/functions/NERNNLayer.h +++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h @@ -53,6 +53,16 @@ public: /** Default destructor */ ~NERNNLayer(); /** Initialize the function + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |src3 |dst0 |dst1 | + * |:------|:------|:------|:------|:------|:------| + * |F16 |F16 |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 |F32 |F32 | * * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32 * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input diff --git a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h index c72cd494d2..ea1af4daea 100644 --- a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h +++ b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h @@ -37,11 +37,22 @@ class NEROIAlignLayer : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * |QASYMM8 |QASYMM16 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM16 |QASYMM8_SIGNED | * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. - * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. * @@ -54,7 +65,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, + * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, * otherwise same as @p input * @param[in] output Destination tensor info. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h index 214dd43402..2992b3eb95 100644 --- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h @@ -52,6 +52,15 @@ public: /** Default destructor */ ~NEROIPoolingLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |F32 |U16 |F32 | + * |QASYMM8 |U16 |QASYMM8 | * * @param[in] input Source tensor. Data types supported: QASYMM8/F32 * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner diff --git a/arm_compute/runtime/NEON/functions/NERange.h b/arm_compute/runtime/NEON/functions/NERange.h index 28976001d7..cb14c8fdde 100644 --- a/arm_compute/runtime/NEON/functions/NERange.h +++ b/arm_compute/runtime/NEON/functions/NERange.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,21 @@ public: /** Default destructor */ ~NERange(); /** Initialize the kernel's start, end, step and output tensor. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |dst | + * |:---------| + * |U8 | + * |S8 | + * |U16 | + * |S16 | + * |U32 | + * |S32 | + * |F16 | + * |F32 | * * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. * @param[in] start The starting value of the sequence. diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h index 89cd09812b..7512115a3f 100644 --- a/arm_compute/runtime/NEON/functions/NEReduceMean.h +++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,6 +53,17 @@ public: /** Default destructor */ ~NEReduceMean(); /** Configure kernel + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | * * @note Supported tensor rank: up to 4 * diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h index b96b70926c..533c10adcf 100644 --- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h +++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h @@ -58,7 +58,19 @@ public: ~NEReductionOperation(); /** Set the input and output tensors. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |QASYMM8 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED | + * |F16 |F16 | + * |F32 |F32 | + * |S32 |S32 | + * + * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. (Written to only for border_size != 0) * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. @@ -68,7 +80,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation. * - * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. + * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h index 835ebfab7e..1693078f66 100644 --- a/arm_compute/runtime/NEON/functions/NERemap.h +++ b/arm_compute/runtime/NEON/functions/NERemap.h @@ -43,6 +43,14 @@ class NERemap : public INESimpleFunction { public: /** Initialise the function's sources, destination, interpolation policy and border mode. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:------|:------|:------|:------| + * |U8 |F32 |F32 |U 8 | * * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) * @param[in] map_x Map for X coordinates. Data type supported: F32. diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h index f76d1d252c..0a7d824d10 100644 --- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,15 @@ class NEReorgLayer : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and outputs + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input First tensor input. Data type supported: All * @param[out] output Output tensor. Data type supported: Same as @p input diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h index 2048dafcb5..c02fff54a5 100644 --- a/arm_compute/runtime/NEON/functions/NEReverse.h +++ b/arm_compute/runtime/NEON/functions/NEReverse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,14 @@ class NEReverse : public INESimpleFunctionNoBorder { public: /** Initialize the function + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |dst | + * |:--------------|:--------------|:--------------| + * |All |U32 |All | * * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h index c66fbfa7d4..c8e5a204dd 100644 --- a/arm_compute/runtime/NEON/functions/NESelect.h +++ b/arm_compute/runtime/NEON/functions/NESelect.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,14 @@ class NESelect : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and output. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:------|:--------------| + * |U8 |All |All |All | * * @param[in] c Condition input tensor. Data types supported: U8. * @param[in] x First input tensor. Data types supported: All. diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h index 27c1ddf8e3..ad8c1467d0 100644 --- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h +++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h @@ -57,6 +57,15 @@ public: /** Default destructor */ ~NESpaceToBatchLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:---------|:---------|:---------|:---------| + * |All |S32 |S32 |All | * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Supported M: 2. Data types supported: S32 diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h index 73c228d8ee..1820cb8f6b 100644 --- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h +++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h @@ -52,6 +52,15 @@ public: /** Default destructor */ ~NESpaceToDepthLayer(); /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[out] output Tensor output. Data types supported: same as @p input diff --git a/arm_compute/runtime/NEON/functions/NESplit.h b/arm_compute/runtime/NEON/functions/NESplit.h index ede5ecf65a..206f299c06 100644 --- a/arm_compute/runtime/NEON/functions/NESplit.h +++ b/arm_compute/runtime/NEON/functions/NESplit.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,18 @@ namespace arm_compute class NESplit : public CPPSplit { public: + /** NESplit + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | + * + */ + // Inherited methods overridden: void run() override; }; diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h index f6fa4f2eb3..ae4e468f21 100644 --- a/arm_compute/runtime/NEON/functions/NEStackLayer.h +++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,14 @@ public: /** Default destructor */ ~NEStackLayer(); /** Initialise the kernel's inputs vector and output. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @note Supported input tensor rank: up to 4 * diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h index d5ce76c9cf..915e5aa1da 100644 --- a/arm_compute/runtime/NEON/functions/NETile.h +++ b/arm_compute/runtime/NEON/functions/NETile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,14 @@ class NETile : public INESimpleFunctionNoBorder { public: /** Set the source, destination of the kernel + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input Source tensor. Data type supported: All. * @param[out] output Destination tensor. Same as @p input diff --git a/arm_compute/runtime/NEON/functions/NEUnstack.h b/arm_compute/runtime/NEON/functions/NEUnstack.h index c8e85115f7..079fee5b9e 100644 --- a/arm_compute/runtime/NEON/functions/NEUnstack.h +++ b/arm_compute/runtime/NEON/functions/NEUnstack.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,14 @@ public: /** Default destructor */ ~NEUnstack() = default; /** Set the input, output and unstacking axis. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |All |All | * * @param[in] input A tensor to be unstacked. Data type supported: All. * @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input. diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h index 3367b10a96..77f9093ed4 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h @@ -65,6 +65,16 @@ public: ~NEWinogradConvolutionLayer() = default; /** Set the input and output tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:--------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. diff --git a/arm_compute/runtime/OperatorList.h b/arm_compute/runtime/OperatorList.h index 8c43c68b90..a659a79423 100644 --- a/arm_compute/runtime/OperatorList.h +++ b/arm_compute/runtime/OperatorList.h @@ -40,7 +40,7 @@ * */ -/** ArgMinMaxLayer (not ported) +/** ArgMinMaxLayer * * Description: * Function to calculate the index of the minimum or maximum values in a tensor based on an axis. @@ -71,27 +71,27 @@ * */ -/** BatchNormalizationLayer (not ported) +/** BatchNormalizationLayer * * Description: - * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f] + * Function to perform batch normalization. * * Equivalent Android NNAPI Op: - * None + * n/a * */ -/** BatchToSpaceLayer (not ported) +/** BatchToSpaceLayer * * Description: - * Rearranges (permutes) data from batch into blocks of spatial data, followed by cropping. It is the reverse transformation of SpaceToBatch (from TF website) + * Batch to space transformation. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_BATCH_TO_SPACE_ND * */ -/** BitwiseAnd (not ported) +/** BitwiseAnd * * Description: * Function to performe bitwise AND between 2 tensors. @@ -101,7 +101,7 @@ * */ -/** BitwiseNot (not ported) +/** BitwiseNot * * Description: * Function to performe bitwise NOT. @@ -111,7 +111,7 @@ * */ -/** BitwiseOr (not ported) +/** BitwiseOr * * Description: * Function to performe bitwise OR between 2 tensors. @@ -121,27 +121,27 @@ * */ -/** BitwiseXor (not ported) +/** BitwiseXor * * Description: * Function to performe bitwise XOR between 2 tensors. * * Equivalent Android NNAPI Op: - * None + * n/a * */ -/** BoundingBoxTransform (not ported) +/** BoundingBoxTransform * * Description: - * Function to . + * Transform proposal bounding boxes to target bounding box using bounding box deltas. * * Equivalent Android NNAPI Op: - * ? + * n/a * */ -/** Cast (not ported) +/** Cast * * Description: * Function to cast a tensor. @@ -151,20 +151,20 @@ * */ -/** ChannelShuffelLayer (not ported) +/** ChannelShuffleLayer * * Description: - * Function to cast a tensor. + * Function to shuffle the channels of the input tensor. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_CHANNEL_SHUFFLE * */ -/** Comparison (not ported) (only CL) +/** Comparison (only CL) * * Description: - * Function to cast a tensor. + * Function to compare 2 tensors. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_EQUAL @@ -192,11 +192,11 @@ * Function to tranpose the wieghts for the fully connected layer. * * Equivalent Android NNAPI Op: - * None + * n/a * */ -/** ConvolutionLayer (not ported) +/** ConvolutionLayer * * Description: * Function to compute a convolution layer. @@ -212,74 +212,74 @@ * Function to copy a tensor. * * Equivalent Android NNAPI Op: - * None + * n/a * */ /** Crop (only CL) * * Description: - * Function to . + * Performs a copy of input tensor to the output tensor. * * Equivalent Android NNAPI Op: - * ? + * n/a * */ -/** CropResize (not ported) +/** CropResize * * Description: - * Function to . + * Function to perform cropping and resizing. * * Equivalent Android NNAPI Op: - * ? + * n/a * */ -/** DeconvolutionLayer (not ported) +/** DeconvolutionLayer * * Description: - * Function to . + * Function to compute a deconvolution or tranpose convolution. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_TRANSPOSE_CONV_2D * */ -/** DeconvolutionLayerUpsample (only CL) (not ported) +/** DeconvolutionLayerUpsample (only CL) * * Description: - * Function to . + * Function to execute deconvolution upsample on OpenCL. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_TRANSPOSE_CONV_2D * */ -/** DepthConverterLayer (not ported) +/** DepthConvertLayer * * Description: - * Function to . + * Performs a down-scaling depth conversion. * * Equivalent Android NNAPI Op: - * None + * n/a * */ -/** DepthToSpaceLayer (not ported) +/** DepthToSpaceLayer * * Description: - * Function to . + * Depth to Space transformation. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_DEPTH_TO_SPACE * */ -/** DepthwiseConvolutionLayer (not ported) +/** DepthwiseConvolutionLayer * * Description: - * Function to perform depthwise separable convolution + * Function to perform depthwise separable convolution. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_DEPTHWISE_CONV_2D @@ -289,17 +289,17 @@ /** DequantizationLayer * * Description: - * Function to dequantize the values in a tensor + * Function to dequantize the values in a tensor. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_DEQUANTIZE * */ -/** DetectionPostProcessLayer (not ported) (no CL) +/** DetectionPostProcessLayer (no CL) * * Description: - * Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS) + * Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS). * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_DETECTION_POSTPROCESSING @@ -309,7 +309,7 @@ /** DirectConvolutionLayer * * Description: - * Function to + * Function to compute direct convolution. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_CONV_2D @@ -319,7 +319,7 @@ /** DirectDeconvolutionLayer (only CL) * * Description: - * Function to + * Function to run the deconvolution layer. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_TRANSPOSE_CONV_2D @@ -387,27 +387,27 @@ /** FFT1D * * Description: - * Fast Fourier Transform 1D + * Fast Fourier Transform 1D. * * Equivalent Android NNAPI Op: - * None + * n/a * */ /** FFT2D * * Description: - * Fast Fourier Transform 2D + * Fast Fourier Transform 2D. * * Equivalent Android NNAPI Op: - * None + * n/a * */ /** FFTConvolutionLayer * * Description: - * Fast Fourier Transform Convolution + * Fast Fourier Transform Convolution. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_CONV_2D @@ -417,24 +417,24 @@ /** Fill * * Description: - * Set the values of a tensor with a given value + * Set the values of a tensor with a given value. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_FILL * */ -/** FillBorder (not ported) +/** FillBorder * * Description: - * + * Function to . * * Equivalent Android NNAPI Op: - * ? + * n/a * */ -/** FlattenLayer (not ported) +/** FlattenLayer * * Description: * Reshape a tensor to be 1D @@ -447,104 +447,104 @@ /** Floor * * Description: - * Round the value to the lowest number + * Round the value to the lowest number. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_FLOOR * */ -/** FullyConnectedLayer (not ported) +/** FullyConnectedLayer * * Description: - * Function to perform a fully connected / dense layer + * Function to perform a fully connected / dense layer. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_FULLY_CONNECTED * */ -/** FuseBatchNormalization (not ported) +/** FuseBatchNormalization * * Description: - * Function to . + * Function to fuse the batch normalization node to a preceding convolution node. * * Equivalent Android NNAPI Op: - * None + * n/a * */ -/** Gather (not ported) +/** Gather * * Description: - * Function to . + * Performs the Gather operation along the chosen axis. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_GATHER * */ -/** GEMM (not ported) +/** GEMM * * Description: * General Matrix Multiplication. * * Equivalent Android NNAPI Op: - * None + * n/a * */ -/** GEMMConv2D (not ported) (no CL) +/** GEMMConv2D (no CL) * * Description: * General Matrix Multiplication. * * Equivalent Android NNAPI Op: - * None + * ANEURALNETWORKS_CONV_2D * */ -/** GEMMConvolutionLayer (not ported) +/** GEMMConvolutionLayer * * Description: * General Matrix Multiplication. * * Equivalent Android NNAPI Op: - * None + * ANEURALNETWORKS_CONV_2D * */ -/** GEMMDeconvolutionLayer (not ported) (only CL) +/** GEMMDeconvolutionLayer (only CL) * * Description: * General Matrix Multiplication. * * Equivalent Android NNAPI Op: - * None + * ANEURALNETWORKS_TRANSPOSE_CONV_2D * */ -/** GEMMLowpMatrixMultiplyCore (not ported) +/** GEMMLowpMatrixMultiplyCore * * Description: * General Matrix Multiplication. * * Equivalent Android NNAPI Op: - * None + * n/a * */ -/** GEMMLowpOutputStage (not ported) +/** GEMMLowpOutputStage * * Description: * General Matrix Multiplication. * * Equivalent Android NNAPI Op: - * None + * n/a * */ -/** GenerateProposalsLayer (not ported) +/** GenerateProposalsLayer * * Description: * Function to generate proposals for a RPN (Region Proposal Network). @@ -554,7 +554,7 @@ * */ -/** InstanceNormalizationLayer (not ported) +/** InstanceNormalizationLayer * * Description: * Function to perform a Instance normalization on a given axis. @@ -564,7 +564,7 @@ * */ -/** L2NormalizationLayer (not ported) +/** L2NormalizeLayer * * Description: * Function to perform a L2 normalization on a given axis. @@ -583,102 +583,92 @@ * - Logical NOT * * Equivalent Android NNAPI Op: - * None? + * n/a * */ /** LogicalAnd (only CL) * * Description: - * Function to perform Logical AND + * Function to perform Logical AND. * * Equivalent Android NNAPI Op: - * None? + * n/a * */ /** LogicalOr (only CL) * * Description: - * Function to perform Logical OR + * Function to perform Logical OR. * * Equivalent Android NNAPI Op: - * None? + * n/a * */ /** LogicalNot (only CL) * * Description: - * Function to perform Logical NOT + * Function to perform Logical NOT. * * Equivalent Android NNAPI Op: - * None? + * n/a * */ -/** LSTMLayer (not ported) +/** LSTMLayer * * Description: - * Function to perform LSTM + * Function to perform a single time step in a Long Short-Term Memory (LSTM) layer. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_LSTM * */ -/** LSTMLayerQuantized (not ported) +/** LSTMLayerQuantized * * Description: - * Function to perform LSTM + * Function to perform quantized LSTM (Long Short-Term Memory) * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_QUANTIZED_LSTM - * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM ? + * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM * */ -/** MaxUnpoolingLayer (not ported) +/** MaxUnpoolingLayer * * Description: - * Function to perform MaxUnpooling + * Function to perform MaxUnpooling. * * Equivalent Android NNAPI Op: - * ? + * n/a * */ -/** MeanStdDevNormalizationLayer (not ported) +/** MeanStdDevNormalizationLayer * * Description: * Function to execute mean and standard deviation normalization. * * Equivalent Android NNAPI Op: - * None ? + * n/a * */ -/** MeanStdDevNormalizationLayer (not ported) - * - * Description: - * Function to execute mean and standard deviation normalization. - * - * Equivalent Android NNAPI Op: - * None ? - * - */ - -/** NormalizationLayer (not ported) +/** NormalizationLayer * * Description: * Function to compute normalization layer. * * Equivalent Android NNAPI Op: - * None ? + * ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION * */ -/** PadLayer (not ported) +/** PadLayer * * Description: * Function to pad a tensor. @@ -731,24 +721,24 @@ * */ -/** PriorBoxLayer (not ported) +/** PriorBoxLayer * * Description: - * Function to compute the activation layer with the PRELU activation function. + * Function to . * * Equivalent Android NNAPI Op: - * ? + * n/a * */ -/** QLSTMLayer (not ported) +/** QLSTMLayer * * Description: - * Function to perform LSTM + * Function to perform quantized LSTM (Long Short-Term Memory). * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_QUANTIZED_LSTM - * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM ? + * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM * */ @@ -762,17 +752,17 @@ * */ -/** Range (not ported) +/** Range * * Description: - * Function to . + * Function to generates a sequence of numbers starting from START and extends by increments of 'STEP' up to but not including 'END'. * * Equivalent Android NNAPI Op: - * none? + * n/a * */ -/** RecudeMean (not ported) +/** ReduceMean * * Description: * Function to performe reduce mean operation. @@ -782,22 +772,7 @@ * */ -/** RecudeOperation (not ported) - * - * Description: - * Function to performe reduce mean operation. - * - * Equivalent Android NNAPI Op: - * ANEURALNETWORKS_REDUCE_ALL - * ANEURALNETWORKS_REDUCE_ANY - * ANEURALNETWORKS_REDUCE_MAX - * ANEURALNETWORKS_REDUCE_MIN - * ANEURALNETWORKS_REDUCE_PROD - * ANEURALNETWORKS_REDUCE_SUM - * - */ - -/** RecudeOperation (not ported) +/** ReductionOperation * * Description: * Function to performe reduce with the following operations @@ -820,20 +795,20 @@ * */ -/** ReorgLayer (not ported) +/** ReorgLayer * * Description: - * Function to performe reorg + * Performs a reorganization layer of input tensor to the output tensor. * * Equivalent Android NNAPI Op: - * None? + * n/a * */ /** ReshapeLayer * * Description: - * Fucntion to reshape a tensor + * Function to reshape a tensor. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_RESHAPE @@ -841,40 +816,40 @@ * */ -/** ReverseLayer (not ported) +/** Reverse * * Description: - * Fucntion to . + * Function to reverse tensor according to axis. * * Equivalent Android NNAPI Op: - * None? + * n/a * */ -/** RNNLayer (not ported) +/** RNNLayer * * Description: - * Fucntion to perform RNN . + * Function to perform recurrent neural network layer. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_RNN * */ -/** ROIAligmentLayer (not ported) +/** ROIAlignLayer * * Description: - * Fucntion to perform RNN . + * Function to perform ROI alignment. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_ROI_ALIGN * */ -/** ROIPoolingLayer (not ported) +/** ROIPoolingLayer * * Description: - * Fucntion to perform RNN . + * Function to perform ROI pooling. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_ROI_POOLING @@ -884,8 +859,8 @@ /** Scale * * Description: - * Fucntion to perform resize a tensor using to interpolate: - * - Bilenear + * Function to perform resize a tensor using to interpolate: + * - Bilinear * - Nearest neighbor * * Equivalent Android NNAPI Op: @@ -894,10 +869,10 @@ * */ -/** Select (not ported) +/** Select * * Description: - * Fucntion to select values from 2 tensors depending on an input tensor of booleans. + * Function to select values from 2 tensors depending on an input tensor of booleans. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_SELECT @@ -925,7 +900,7 @@ * */ -/** SpaceToBatchLayer (not ported) +/** SpaceToBatchLayer * * Description: * Function to divide a tensor spatially. @@ -935,7 +910,7 @@ * */ -/** SpaceToDepthLayer (not ported) +/** SpaceToDepthLayer * * Description: * Function to rearrange blocks of spatial data into depth. @@ -945,7 +920,7 @@ * */ -/** Split (not ported) +/** Split * * Description: * Function to split a tensor along a given axis. @@ -955,13 +930,13 @@ * */ -/** StackLayer (not ported) +/** StackLayer * * Description: * Function to stack tensors along an axis. * * Equivalent Android NNAPI Op: - * none + * n/a * */ @@ -975,7 +950,7 @@ * */ -/** Tile (not ported) +/** Tile * * Description: * Function to construct a tensor by tiling a given tensor. @@ -988,40 +963,40 @@ /** Transpose * * Description: - * Function to transpose an 2D tensor. + * Function to transpose a 2D tensor. * * Equivalent Android NNAPI Op: * ANEURALNETWORKS_TRANSPOSE * */ -/** Unstack (not ported) +/** Unstack * * Description: * Function to unpack a rank-R tensor into rank-(R-1) tensors. * * Equivalent Android NNAPI Op: - * none + * n/a * */ -/** WinogradConvolutionLayer (not ported) +/** WinogradConvolutionLayer * * Description: - * Function to. + * Function to do Winograd Convolution. * * Equivalent Android NNAPI Op: - * None + * ANEURALNETWORKS_CONV_2D * */ -/** WinogradInputTransform (not ported) (only CL) +/** WinogradInputTransform (only CL) * * Description: * Function to. * * Equivalent Android NNAPI Op: - * None + * n/a * */ diff --git a/docs/09_operators_list.dox b/docs/09_operators_list.dox index 82a127bbd3..244f292f82 100644 --- a/docs/09_operators_list.dox +++ b/docs/09_operators_list.dox @@ -107,14 +107,1531 @@ where N = batches, C = channels, H = height, W = width F16F16 F32F32 + + ArgMinMaxLayer + Function to calculate the index of the minimum or maximum values in a tensor based on an axis. + +
    +
  • ANEURALNETWORKS_ARGMAX +
  • ANEURALNETWORKS_ARGMIN +
+ NEArgMinMaxLayer + +
    +
  • All +
+ + +
srcdst +
QASYMM8U32, S32 +
QASYMM8_SIGNEDU32, S32 +
S32U32, S32 +
F16U32, S32 +
F32U32, S32 +
+ + CLArgMinMaxLayer + +
    +
  • All +
+ + +
srcdst +
QASYMM8U32, S32 +
QASYMM8_SIGNEDU32, S32 +
S32U32, S32 +
F16U32, S32 +
F32U32, S32 +
+ + BatchNormalizationLayer + Function to perform batch normalization. + +
    +
  • n/a +
+ NEBatchNormalizationLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + CLBatchNormalizationLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + BatchToSpaceLayer + Batch to space transformation. + +
    +
  • ANEURALNETWORKS_BATCH_TO_SPACE_ND +
+ NEBatchToSpaceLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1dst +
Alls32All +
+ + CLBatchToSpaceLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1dst +
Alls32All +
+ + BitwiseAnd + Function to performe bitwise AND between 2 tensors. + +
    +
  • ANEURALNETWORKS_LOGICAL_AND +
+ NEBitwiseAnd + +
    +
  • All +
+ + +
srcdst +
U8U8 +
+ + CLBitwiseAnd + +
    +
  • All +
+ + +
srcdst +
U8U8 +
+ + BitwiseNot + Function to performe bitwise NOT. + +
    +
  • ANEURALNETWORKS_LOGICAL_NOT +
+ NEBitwiseNot + +
    +
  • All +
+ + +
srcdst +
U8U8 +
+ + CLBitwiseNot + +
    +
  • All +
+ + +
srcdst +
U8U8 +
+ + BitwiseOr + Function to performe bitwise OR between 2 tensors. + +
    +
  • ANEURALNETWORKS_LOGICAL_OR +
+ NEBitwiseOr + +
    +
  • All +
+ + +
srcdst +
U8U8 +
+ + CLBitwiseOr + +
    +
  • All +
+ + +
srcdst +
U8U8 +
+ + BitwiseXor + Function to performe bitwise XOR between 2 tensors. + +
    +
  • n/a +
+ NEBitwiseXor + +
    +
  • All +
+ + +
srcdst +
U8U8 +
+ + CLBitwiseXor + +
    +
  • All +
+ + +
srcdst +
U8U8 +
+ + BoundingBoxTransform + Transform proposal bounding boxes to target bounding box using bounding box deltas. + +
    +
  • n/a +
+ NEBoundingBoxTransform + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1dst +
QASYMM16QASYMM8QASYMM16 +
F16F16F16 +
F32F32F32 +
+ + CLBoundingBoxTransform + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1dst +
QASYMM16QASYMM8QASYMM16 +
F16F16F16 +
F32F32F32 +
+ + Cast + Function to cast a tensor. + +
    +
  • ANEURALNETWORKS_CAST +
+ NECast + +
    +
  • All +
+ + +
srcdst +
QASYMM8_SIGNEDS16, S32, F32, F16 +
QASYMM8U16, S16, S32, F32, F16 +
U8U16, S16, S32, F32, F16 +
U16U8, U32 +
S16QASYMM8_SIGNED, U8, S32 +
F16QASYMM8_SIGNED, QASYMM8, F32, S32, U8 +
S32QASYMM8_SIGNED, QASYMM8, F16, F32, U8 +
F32QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8 +
+ + CLCast + +
    +
  • All +
+ + +
srcdst +
U8S8, U16, S16, U32, S32, F16, F32 +
U16U8, S8, S16, U32, S32, F16, F32 +
S16U8, S8, U16, U32, S32, F16, F32 +
U32U8, S8, U16, S16, S32, F16, F32 +
S32U8, S8, U16, S16, U32, F16, F32 +
F16U8, S8, U16, S16, U32, F32 +
F32U8, S8, U16, S16, U32, F16 +
+ + ChannelShuffleLayer + Function to shuffle the channels of the input tensor. + +
    +
  • ANEURALNETWORKS_CHANNEL_SHUFFLE +
+ NEChannelShuffleLayer + +
    +
  • NCHW +
+ + +
srcdst +
AllAll +
+ + CLChannelShuffleLayer + +
    +
  • NCHW +
+ + +
srcdst +
AllAll +
ConcatenateLayer Function to concatenate tensors along a given axis.
    -
  • ANEURALNETWORKS_CONCATENATION +
  • ANEURALNETWORKS_CONCATENATION +
+ NEConcatenateLayer + +
    +
  • All +
+ + +
srcdst +
QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNED +
F16F16 +
F32F32 +
+ + CLConcatenateLayer + +
    +
  • All +
+ + +
srcdst +
QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNED +
F16F16 +
F32F32 +
+ + ConvertFullyConnectedWeights + Function to tranpose the wieghts for the fully connected layer. + +
    +
  • n/a +
+ NEConvertFullyConnectedWeights + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
AllAll +
+ + CLConvertFullyConnectedWeights + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
AllAll +
+ + ConvolutionLayer + Function to compute a convolution layer. + +
    +
  • ANEURALNETWORKS_CONV_2D +
+ NEConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + CLConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + Copy + Function to copy a tensor. + +
    +
  • n/a +
+ NECopy + +
    +
  • All +
+ + +
srcdst +
AllAll +
+ + CLCopy + +
    +
  • All +
+ + +
srcdst +
AllAll +
+ + CropResize + Function to perform cropping and resizing. + +
    +
  • n/a +
+ NECropResize + +
    +
  • NHWC +
+ + +
src0src1src2dst +
AllF32F32F32 +
+ + CLCropResize + +
    +
  • NHWC +
+ + +
src0src1src2dst +
AllF32F32F32 +
+ + DeconvolutionLayer + Function to compute a deconvolution or tranpose convolution. + +
    +
  • ANEURALNETWORKS_TRANSPOSE_CONV_2D +
+ NEDeconvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + CLDeconvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + DepthConvertLayer + Performs a down-scaling depth conversion. + +
    +
  • n/a +
+ NEDepthConvertLayer + +
    +
  • All +
+ + +
srcdst +
QASYMM8F16, F32 +
U8U16, S16, S32 +
U16U8, U32 +
S16U8, S32 +
BFLOAT16F32 +
F16QASYMM8, F32 +
F32QASYMM8, F16, BFLOAT16 +
+ + CLDepthConvertLayer + +
    +
  • All +
+ + +
srcdst +
U8S8, U16, S16, U32, S32, F16, F32 +
U16U8, S8, S16, U32, S32, F16, F32 +
S16U8, S8, U16, U32, S32, F16, F32 +
U32U8, S8, U16, S16, S32, F16, F32 +
S32U8, S8, U16, S16, U32, F16, F32 +
F16U8, S8, U16, S16, U32, F32 +
F32U8, S8, U16, S16, U32, F16 +
+ + DepthToSpaceLayer + Depth to Space transformation. + +
    +
  • ANEURALNETWORKS_DEPTH_TO_SPACE +
+ NEDepthToSpaceLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
AllAll +
+ + CLDepthToSpaceLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
AllAll +
+ + DepthwiseConvolutionLayer + Function to perform depthwise separable convolution. + +
    +
  • ANEURALNETWORKS_DEPTHWISE_CONV_2D +
+ NEDepthwiseConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + CLDepthwiseConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + DequantizationLayer + Function to dequantize the values in a tensor. + +
    +
  • ANEURALNETWORKS_DEQUANTIZE +
+ NEDequantizationLayer + +
    +
  • All +
+ + +
srcdst +
QASYMM8F16, F32 +
QASYMM8_SIGNEDF16, F32 +
QSYMM8_PER_CHANNELF16, F32 +
QSYMM8F16, F32 +
QSYMM16F16, F32 +
+ + CLDequantizationLayer + +
    +
  • All +
+ + +
srcdst +
QASYMM8F16, F32 +
QASYMM8_SIGNEDF16, F32 +
QSYMM8_PER_CHANNELF16, F32 +
QSYMM8F16, F32 +
QSYMM16F16, F32 +
+ + DirectConvolutionLayer + Function to compute direct convolution. + +
    +
  • ANEURALNETWORKS_CONV_2D +
+ NEDirectConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
+ + CLDirectConvolutionLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
+ + FFT1D + Fast Fourier Transform 1D. + +
    +
  • n/a +
+ NEFFT1D + +
    +
  • All +
+ + +
srcdst +
F32F32 +
+ + CLFFT1D + +
    +
  • All +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + FFT2D + Fast Fourier Transform 2D. + +
    +
  • n/a +
+ NEFFT2D + +
    +
  • All +
+ + +
srcdst +
F32F32 +
+ + CLFFT2D + +
    +
  • All +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + FFTConvolutionLayer + Fast Fourier Transform Convolution. + +
    +
  • ANEURALNETWORKS_CONV_2D +
+ NEFFTConvolutionLayer + +
    +
  • All +
+ + +
srcdst +
F32F32 +
+ + CLFFTConvolutionLayer + +
    +
  • All +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + Fill + Set the values of a tensor with a given value. + +
    +
  • ANEURALNETWORKS_FILL +
+ NEFill + +
    +
  • All +
+ + +
srcdst +
AllAll +
+ + CLFill + +
    +
  • All +
+ + +
srcdst +
AllAll +
+ + FillBorder + Function to . + +
    +
  • n/a +
+ NEFillBorder + +
    +
  • All +
+ + +
srcdst +
AllAll +
+ + CLFillBorder + +
    +
  • All +
+ + +
srcdst +
AllAll +
+ + FlattenLayer + Reshape a tensor to be 1D + +
    +
  • ANEURALNETWORKS_RESHAPE +
+ NEFlattenLayer + +
    +
  • All +
+ + +
srcdst +
AllAll +
+ + CLFlattenLayer + +
    +
  • All +
+ + +
srcdst +
AllAll +
+ + Floor + Round the value to the lowest number. + +
    +
  • ANEURALNETWORKS_FLOOR +
+ NEFloor + +
    +
  • All +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + CLFloor + +
    +
  • All +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + FullyConnectedLayer + Function to perform a fully connected / dense layer. + +
    +
  • ANEURALNETWORKS_FULLY_CONNECTED +
+ NEFullyConnectedLayerReshapeWeightsManaged + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
+ + CLFullyConnectedLayerReshapeWeightsManaged + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
+ + FuseBatchNormalization + Function to fuse the batch normalization node to a preceding convolution node. + +
    +
  • n/a +
+ NEFuseBatchNormalization + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + CLFuseBatchNormalization + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + Gather + Performs the Gather operation along the chosen axis. + +
    +
  • ANEURALNETWORKS_GATHER +
+ NEGather + +
    +
  • All +
+ + +
srcdst +
AllAll +
+ + CLGather + +
    +
  • All +
+ + +
srcdst +
AllAll +
+ + GEMM + General Matrix Multiplication. + +
    +
  • n/a +
+ NEGEMM + +
    +
  • All +
+ + +
src0src1src2dst +
F32F32F32F32 +
F16F16F16F16 +
BFLOAT16BFLOAT16BFLOAT16BFLOAT16 +
+ + CLGEMMReshapeRHSMatrixKernelManaged + +
    +
  • All +
+ + +
src0src1src2dst +
F32F32F32F32 +
F16F16F16F16 +
+ + GEMMConvolutionLayer + General Matrix Multiplication. + +
    +
  • ANEURALNETWORKS_CONV_2D +
+ NEConvolutionLayerReshapeWeights + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
BFLOAT16BFLOAT16BFLOAT16BFLOAT16 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + CLConvolutionLayerReshapeWeights + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
+ + GEMMLowpMatrixMultiplyCore + General Matrix Multiplication. + +
    +
  • n/a +
+ NEGEMMLowpMatrixMultiplyCore + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8QSYMM8S32QASYMM8 +
QASYMM8QASYMM8S32S32 +
QASYMM8QSYMM8_PER_CHANNELS32S32 +
QASYMM8QSYMM8S32S32 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8S32QASYMM8_SIGNED +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32S32 +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32S32 +
QASYMM8_SIGNEDQSYMM8S32S32 +
+ + CLGEMMLowpMatrixMultiplyCore + +
    +
  • NHWC +
  • NCHW +
+ + +
src0src1src2dst +
QASYMM8QASYMM8S32QASYMM8 +
QASYMM8QSYMM8_PER_CHANNELS32QASYMM8 +
QASYMM8QSYMM8S32QASYMM8 +
QASYMM8QASYMM8S32S32 +
QASYMM8QSYMM8_PER_CHANNELS32S32 +
QASYMM8QSYMM8S32S32 +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32QASYMM8_SIGNED +
QASYMM8_SIGNEDQSYMM8S32QASYMM8_SIGNED +
QASYMM8_SIGNEDQASYMM8_SIGNEDS32S32 +
QASYMM8_SIGNEDQSYMM8_PER_CHANNELS32S32 +
QASYMM8_SIGNEDQSYMM8S32S32 +
+ + GenerateProposalsLayer + Function to generate proposals for a RPN (Region Proposal Network). + +
    +
  • ANEURALNETWORKS_GENERATE_PROPOSALS +
+ NEGenerateProposalsLayer + +
    +
  • All +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QSYMM8QSYMM16QASYMM8 +
+ + CLGenerateProposalsLayer + +
    +
  • All +
+ + +
src0src1src2dst +
F16F16F16F16 +
F32F32F32F32 +
QASYMM8QSYMM8QSYMM16QASYMM8 +
+ + InstanceNormalizationLayer + Function to perform a Instance normalization on a given axis. + +
    +
  • ANEURALNETWORKS_INSTANCE_NORMALIZATION +
+ NEInstanceNormalizationLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F16F16 +
F32F32 +
+ + CLInstanceNormalizationLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F16F16 +
F32F32 +
+ + L2NormalizeLayer + Function to perform a L2 normalization on a given axis. + +
    +
  • ANEURALNETWORKS_L2_NORMALIZATION +
+ NEL2NormalizeLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F16F16 +
F32F32 +
+ + CLL2NormalizeLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F16F16 +
F32F32 +
+ + LSTMLayer + Function to perform a single time step in a Long Short-Term Memory (LSTM) layer. + +
    +
  • ANEURALNETWORKS_LSTM +
+ NELSTMLayer + +
    +
  • All +
+ + +
src0 - src13dst0 - dst3 +
F16F16 +
F32F32 +
+ + CLLSTMLayer + +
    +
  • All +
+ + +
src0 - src13dst0 - dst3 +
F16F16 +
F32F32 +
+ + LSTMLayerQuantized + Function to perform quantized LSTM (Long Short-Term Memory) + +
    +
  • ANEURALNETWORKS_QUANTIZED_LSTM +
  • ANEURALNETWORKS_QUANTIZED_16BIT_LSTM +
+ NELSTMLayerQuantized + +
    +
  • All +
+ + +
src0 - src8src9 - src12src13src14dst0dst1 +
QASYMM8S32QSYMM16QASYMM8QSYMM16QASYMM8 +
+ + CLLSTMLayerQuantized + +
    +
  • All +
+ + +
src0 - src8src9 - src12src13src14dst0dst1 +
QASYMM8S32QSYMM16QASYMM8QSYMM16QASYMM8 +
+ + MaxUnpoolingLayer + Function to perform MaxUnpooling. + +
    +
  • n/a +
+ NEMaxUnpoolingLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNED +
F16F16 +
F32F32 +
+ + CLMaxUnpoolingLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNED +
F16F16 +
F32F32 +
+ + MeanStdDevNormalizationLayer + Function to execute mean and standard deviation normalization. + +
    +
  • n/a +
+ NEMeanStdDevNormalizationLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + CLMeanStdDevNormalizationLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + NormalizationLayer + Function to compute normalization layer. + +
    +
  • ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION +
+ NENormalizationLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + CLNormalizationLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
F32F32 +
F16F16 +
+ + PadLayer + Function to pad a tensor. + +
    +
  • ANEURALNETWORKS_PAD +
  • ANEURALNETWORKS_PAD_V2 +
+ NEPadLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
AllAll +
+ + CLPadLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
AllAll +
+ + Permute + Function to transpose an ND tensor. + +
    +
  • ANEURALNETWORKS_TRANSPOSE +
+ NEPermute + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
AllAll +
+ + CLPermute + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
AllAll +
+ + PixelWiseMultiplication + Function to performe a multiplication. + +
    +
  • ANEURALNETWORKS_MUL +
+ NEPixelWiseMultiplication + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
QSYMM16QSYMM16S32 +
U8U8U8 +
U8U8S16 +
U8S16S16 +
S16U8S16 +
S16S16S16 +
F16F16F16 +
F32S32F32 +
+ + CLPixelWiseMultiplication + +
    +
  • All +
+ + +
src0src1dst +
QASYMM8QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED +
QSYMM16QSYMM16QASYMM16 +
QSYMM16QSYMM16S32 +
U8U8U8 +
U8U8S16 +
U8S16S16 +
S16U8S16 +
S16S16S16 +
F16F16F16 +
F32S32F32 +
+ + PoolingLayer + Function to performe pooling with the specified pooling operation. + +
    +
  • ANEURALNETWORKS_AVERAGE_POOL_2D +
  • ANEURALNETWORKS_L2_POOL_2D +
  • ANEURALNETWORKS_MAX_POOL_2D
- NEConcatenateLayer + NEPoolingLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNED +
F16F16 +
F32F32 +
+ + CLPoolingLayer + +
    +
  • NHWC +
  • NCHW +
+ + +
srcdst +
QASYMM8QASYMM8 +
QASYMM8_SIGNEDQASYMM8_SIGNED +
F16F16 +
F32F32 +
+ + PReluLayer + Function to compute the activation layer with the PRELU activation function. + +
    +
  • ANEURALNETWORKS_PRELU +
+ NEPReluLayer
  • All @@ -128,7 +1645,7 @@ where N = batches, C = channels, H = height, W = width F32F32 - CLConcatenateLayer + CLPReluLayer
    • All @@ -142,13 +1659,224 @@ where N = batches, C = channels, H = height, W = width F32F32 - ConvertFullyConnectedWeights - Function to tranpose the wieghts for the fully connected layer. + PriorBoxLayer + Function to .
        -
      • None +
      • n/a
      - NEConvertFullyConnectedWeights + NEPriorBoxLayer + +
        +
      • NHWC +
      • NCHW +
      + + +
      src0src1dst +
      F32F32F32 +
      + + CLPriorBoxLayer + +
        +
      • NHWC +
      • NCHW +
      + + +
      src0src1dst +
      F32F32F32 +
      + + QLSTMLayer + Function to perform quantized LSTM (Long Short-Term Memory). + +
        +
      • ANEURALNETWORKS_QUANTIZED_LSTM +
      • ANEURALNETWORKS_QUANTIZED_16BIT_LSTM +
      + NEQLSTMLayer + +
        +
      • All +
      + + +
      src0src1 - src6src7 -src9src10src11dst0dst1 - dst2 +
      QASYMM8_SIGNEDQASYMM8S32QSYMM16QASYMM8_SIGNEDQSYMM16QASYMM8_SIGNED +
      + + CLQLSTMLayer + +
        +
      • All +
      + + +
      src0src1 - src6src7 -src9src10src11dst0dst1 - dst2 +
      QASYMM8_SIGNEDQASYMM8S32QSYMM16QASYMM8_SIGNEDQSYMM16QASYMM8_SIGNED +
      + + QuantizationLayer + Function to perform quantization layer + +
        +
      • ANEURALNETWORKS_QUANTIZE +
      + NEQuantizationLayer + +
        +
      • All +
      + + +
      srcdst +
      QASYMM8QASYMM8, QASYMM8_SIGNED, QASYMM16 +
      QASYMM8_SIGNEDQASYMM8, QASYMM8_SIGNED, QASYMM16 +
      F16QASYMM8, QASYMM8_SIGNED, QASYMM16 +
      F32QASYMM8, QASYMM8_SIGNED, QASYMM16 +
      + + CLQuantizationLayer + +
        +
      • All +
      + + +
      srcdst +
      QASYMM8QASYMM8, QASYMM8_SIGNED, QASYMM16 +
      QASYMM8_SIGNEDQASYMM8, QASYMM8_SIGNED, QASYMM16 +
      F16QASYMM8, QASYMM8_SIGNED, QASYMM16 +
      F32QASYMM8, QASYMM8_SIGNED, QASYMM16 +
      + + Range + Function to generates a sequence of numbers starting from START and extends by increments of 'STEP' up to but not including 'END'. + +
        +
      • n/a +
      + NERange + +
        +
      • All +
      + + +
      dst +
      U8 +
      S8 +
      U16 +
      S16 +
      U32 +
      S32 +
      F16 +
      F32 +
      + + CLRange + +
        +
      • All +
      + + +
      dst +
      U8 +
      S8 +
      QASYMM8 +
      U16 +
      S16 +
      U32 +
      S32 +
      F16 +
      F32 +
      + + ReduceMean + Function to performe reduce mean operation. + +
        +
      • ANEURALNETWORKS_MEAN +
      + NEReduceMean + +
        +
      • All +
      + + +
      srcdst +
      QASYMM8QASYMM8 +
      QASYMM8_SIGNEDQASYMM8_SIGNED +
      F16F16 +
      F32F32 +
      + + CLReduceMean + +
        +
      • All +
      + + +
      srcdst +
      QASYMM8QASYMM8 +
      QASYMM8_SIGNEDQASYMM8_SIGNED +
      F16F16 +
      F32F32 +
      + + ReductionOperation + Function to performe reduce with the following operations - ARG_IDX_MAX: Index of the max value - ARG_IDX_MIN: Index of the min value - MEAN_SUM: Mean of sum - PROD: Product - SUM_SQUARE: Sum of squares - SUM: Sum - MIN: Min - MAX: Max + +
        +
      • ANEURALNETWORKS_REDUCE_ALL +
      • ANEURALNETWORKS_REDUCE_ANY +
      • ANEURALNETWORKS_REDUCE_MAX +
      • ANEURALNETWORKS_REDUCE_MIN +
      • ANEURALNETWORKS_REDUCE_PROD +
      • ANEURALNETWORKS_REDUCE_SUM +
      + NEReductionOperation + +
        +
      • All +
      + + +
      srcdst +
      QASYMM8QASYMM8 +
      QASYMM8_SIGNEDQASYMM8_SIGNED +
      F16F16 +
      F32F32 +
      S32S32 +
      + + CLReductionOperation + +
        +
      • All +
      + + +
      srcdst +
      QASYMM8QASYMM8 +
      QASYMM8_SIGNEDQASYMM8_SIGNED +
      F16F16 +
      F32F32 +
      S32S32 +
      + + ReorgLayer + Performs a reorganization layer of input tensor to the output tensor. + +
        +
      • n/a +
      + NEReorgLayer
      • NHWC @@ -160,7 +1888,7 @@ where N = batches, C = channels, H = height, W = width AllAll - CLConvertFullyConnectedWeights + CLReorgLayer
        • NHWC @@ -172,13 +1900,14 @@ where N = batches, C = channels, H = height, W = width AllAll - Copy - Function to copy a tensor. + ReshapeLayer + Function to reshape a tensor.
            -
          • None +
          • ANEURALNETWORKS_RESHAPE +
          • ANEURALNETWORKS_SQUEEZE
          - NECopy + NEReshapeLayer
          • All @@ -189,7 +1918,7 @@ where N = batches, C = channels, H = height, W = width AllAll - CLCopy + CLReshapeLayer
            • All @@ -200,59 +1929,41 @@ where N = batches, C = channels, H = height, W = width AllAll - DequantizationLayer - Function to dequantize the values in a tensor + Reverse + Function to reverse tensor according to axis.
                -
              • ANEURALNETWORKS_DEQUANTIZE +
              • n/a
              - NEDequantizationLayer + NEReverse
              • All
              -
              srcdst -
              QASYMM8F16 -
              QASYMM8F32 -
              QASYMM8_SIGNEDF16 -
              QASYMM8_SIGNEDF32 -
              QSYMM8_PER_CHANNELF16 -
              QSYMM8_PER_CHANNELF32 -
              QSYMM8F16 -
              QSYMM8F32 -
              QSYMM16F16 -
              QSYMM16F32 +
              src0src1dst +
              AllU32All
              - CLDequantizationLayer + CLReverse
              • All
              -
              srcdst -
              QASYMM8F16 -
              QASYMM8F32 -
              QASYMM8_SIGNEDF16 -
              QASYMM8_SIGNEDF32 -
              QSYMM8_PER_CHANNELF16 -
              QSYMM8_PER_CHANNELF32 -
              QSYMM8F16 -
              QSYMM8F32 -
              QSYMM16F16 -
              QSYMM16F32 +
              src0src1dst +
              AllU32All
              - DirectConvolutionLayer - Function to + RNNLayer + Function to perform recurrent neural network layer.
                -
              • ANEURALNETWORKS_CONV_2D +
              • ANEURALNETWORKS_RNN
              - NEDirectConvolutionLayer + NERNNLayer
              • NHWC @@ -260,12 +1971,12 @@ where N = batches, C = channels, H = height, W = width
              -
              src0src1src2dst -
              F16F16F16F16 -
              F32F32F32F32 +
              src0src1src2src3dst0dst1 +
              F16F16F16F16F16F16 +
              F32F32F32F32F32F32
              - CLDirectConvolutionLayer + CLRNNLayer
              • NHWC @@ -273,135 +1984,152 @@ where N = batches, C = channels, H = height, W = width
              -
              src0src1src2dst -
              F16F16F16F16 -
              F32F32F32F32 -
              QASYMM8QASYMM8S32QASYMM8 -
              QASYMM8_SIGNEDQASYMM8_SIGNEDS32QASYMM8_SIGNED +
              src0src1src2src3dst0dst1 +
              F16F16F16F16F16F16 +
              F32F32F32F32F32F32
              - FFT1D - Fast Fourier Transform 1D + ROIAlignLayer + Function to perform ROI alignment.
                -
              • None +
              • ANEURALNETWORKS_ROI_ALIGN
              - NEFFT1D + NEROIAlignLayer
              • All
              -
              srcdst -
              F32F32 +
              src0src1dst +
              F16F16F16 +
              F32F32F32 +
              QASYMM8QASYMM16QASYMM8 +
              QASYMM8_SIGNEDQASYMM16QASYMM8_SIGNED
              - CLFFT1D + CLROIAlignLayer
              • All
              -
              srcdst -
              F32F32 -
              F16F16 +
              src0src1dst +
              F16F16F16 +
              F32F32F32 +
              QASYMM8QASYMM16QASYMM8 +
              QASYMM8_SIGNEDQASYMM16QASYMM8_SIGNED
              - FFT2D - Fast Fourier Transform 2D + ROIPoolingLayer + Function to perform ROI pooling.
                -
              • None +
              • ANEURALNETWORKS_ROI_POOLING
              - NEFFT2D + NEROIPoolingLayer
              • All
              -
              srcdst -
              F32F32 +
              src0src1dst +
              F32U16F32 +
              QASYMM8U16QASYMM8
              - CLFFT2D + CLROIPoolingLayer
              • All
              -
              srcdst -
              F32F32 -
              F16F16 +
              src0src1dst +
              F16U16F16 +
              F32U16F32 +
              QASYMM8U16QASYMM8
              - FFTConvolutionLayer - Fast Fourier Transform Convolution + Scale + Function to perform resize a tensor using to interpolate: - Bilinear - Nearest neighbor
                -
              • ANEURALNETWORKS_CONV_2D +
              • ANEURALNETWORKS_RESIZE_BILINEAR +
              • ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR
              - NEFFTConvolutionLayer + NEScale
                -
              • All +
              • NHWC +
              • NCHW
              srcdst +
              QASYMM8QASYMM8 +
              QASYMM8_SIGNEDQASYMM8_SIGNED +
              F16F16
              F32F32 +
              U8U8 +
              S16S16
              - CLFFTConvolutionLayer + CLScale
                -
              • All +
              • NHWC +
              • NCHW
              srcdst -
              F32F32 +
              QASYMM8QASYMM8 +
              QASYMM8_SIGNEDQASYMM8_SIGNED
              F16F16 +
              F32F32 +
              U8U8 +
              S16S16
              - Fill - Set the values of a tensor with a given value + Select + Function to select values from 2 tensors depending on an input tensor of booleans.
                -
              • ANEURALNETWORKS_FILL +
              • ANEURALNETWORKS_SELECT
              - NEFill + NESelect
              • All
              -
              srcdst -
              AllAll +
              src0src1src2dst +
              U8AllAllAll
              - CLFill + CLSelect
              • All
              -
              srcdst -
              AllAll +
              src0src1src2dst +
              U8AllAllAll
              - Floor - Round the value to the lowest number + Slice + Function to perform tensor slicing.
                -
              • ANEURALNETWORKS_FLOOR +
              • ANEURALNETWORKS_SLICE
              - NEFloor + NESlice
              • All @@ -409,11 +2137,10 @@ where N = batches, C = channels, H = height, W = width
                srcdst -
                F32F32 -
                F16F16 +
                AllAll
                - CLFloor + CLSlice
                • All @@ -421,17 +2148,16 @@ where N = batches, C = channels, H = height, W = width
                  srcdst -
                  F32F32 -
                  F16F16 +
                  AllAll
                  - Permute - Function to transpose an ND tensor. + SpaceToBatchLayer + Function to divide a tensor spatially.
                    -
                  • ANEURALNETWORKS_TRANSPOSE +
                  • ANEURALNETWORKS_SPACE_TO_BATCH_ND
                  - NEPermute + NESpaceToBatchLayer
                  • NHWC @@ -439,11 +2165,11 @@ where N = batches, C = channels, H = height, W = width
                  -
                  srcdst -
                  AllAll +
                  src0src1src2dst +
                  AllS32S32All
                  - CLPermute + CLSpaceToBatchLayer
                  • NHWC @@ -451,67 +2177,17 @@ where N = batches, C = channels, H = height, W = width
                  -
                  srcdst -
                  AllAll -
                  - - PixelWiseMultiplication - Function to performe a multiplication. - -
                    -
                  • ANEURALNETWORKS_MUL -
                  - NEPixelWiseMultiplication - -
                    -
                  • All -
                  - - -
                  src0src1dst -
                  QASYMM8QASYMM8QASYMM8 -
                  QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED -
                  QSYMM16QSYMM16QASYMM16 -
                  QSYMM16QSYMM16S32 -
                  U8U8U8 -
                  U8U8S16 -
                  U8S16S16 -
                  S16U8S16 -
                  S16S16S16 -
                  F16F16F16 -
                  F32S32F32 -
                  - - CLPixelWiseMultiplication - -
                    -
                  • All -
                  - - -
                  src0src1dst -
                  QASYMM8QASYMM8QASYMM8 -
                  QASYMM8_SIGNEDQASYMM8_SIGNEDQASYMM8_SIGNED -
                  QSYMM16QSYMM16QASYMM16 -
                  QSYMM16QSYMM16S32 -
                  U8U8U8 -
                  U8U8S16 -
                  U8S16S16 -
                  S16U8S16 -
                  S16S16S16 -
                  F16F16F16 -
                  F32S32F32 +
                  src0src1src2dst +
                  AllS32S32All
                  - PoolingLayer - Function to performe pooling with the specified pooling operation. + SpaceToDepthLayer + Function to rearrange blocks of spatial data into depth.
                    -
                  • ANEURALNETWORKS_AVERAGE_POOL_2D -
                  • ANEURALNETWORKS_L2_POOL_2D -
                  • ANEURALNETWORKS_MAX_POOL_2D +
                  • ANEURALNETWORKS_SPACE_TO_DEPTH
                  - NEPoolingLayer + NESpaceToDepthLayer
                  • NHWC @@ -520,13 +2196,10 @@ where N = batches, C = channels, H = height, W = width
                    srcdst -
                    QASYMM8QASYMM8 -
                    QASYMM8_SIGNEDQASYMM8_SIGNED -
                    F16F16 -
                    F32F32 +
                    AllAll
                    - CLPoolingLayer + CLSpaceToDepthLayer
                    • NHWC @@ -535,19 +2208,16 @@ where N = batches, C = channels, H = height, W = width
                      srcdst -
                      QASYMM8QASYMM8 -
                      QASYMM8_SIGNEDQASYMM8_SIGNED -
                      F16F16 -
                      F32F32 +
                      AllAll
                      - PReluLayer - Function to compute the activation layer with the PRELU activation function. + Split + Function to split a tensor along a given axis.
                        -
                      • ANEURALNETWORKS_PRELU +
                      • ANEURALNETWORKS_SPLIT
                      - NEPReluLayer + NESplit
                      • All @@ -555,13 +2225,10 @@ where N = batches, C = channels, H = height, W = width
                        srcdst -
                        QASYMM8QASYMM8 -
                        QASYMM8_SIGNEDQASYMM8_SIGNED -
                        F16F16 -
                        F32F32 +
                        AllAll
                        - CLPReluLayer + CLSplit
                        • All @@ -569,19 +2236,16 @@ where N = batches, C = channels, H = height, W = width
                          srcdst -
                          QASYMM8QASYMM8 -
                          QASYMM8_SIGNEDQASYMM8_SIGNED -
                          F16F16 -
                          F32F32 +
                          AllAll
                          - QuantizationLayer - Function to perform quantization layer + StackLayer + Function to stack tensors along an axis.
                            -
                          • ANEURALNETWORKS_QUANTIZE +
                          • n/a
                          - NEQuantizationLayer + NEStackLayer
                          • All @@ -589,21 +2253,10 @@ where N = batches, C = channels, H = height, W = width
                            srcdst -
                            QASYMM8QASYMM8 -
                            QASYMM8QASYMM8_SIGNED -
                            QASYMM8QASYMM16 -
                            QASYMM8_SIGNEDQASYMM8 -
                            QASYMM8_SIGNEDQASYMM8_SIGNED -
                            QASYMM8_SIGNEDQASYMM16 -
                            F16QASYMM8 -
                            F16QASYMM8_SIGNED -
                            F16QASYMM16 -
                            F32QASYMM8 -
                            F32QASYMM8_SIGNED -
                            F32QASYMM16 +
                            AllAll
                            - CLQuantizationLayer + CLStackLayer
                            • All @@ -611,28 +2264,16 @@ where N = batches, C = channels, H = height, W = width
                              srcdst -
                              QASYMM8QASYMM8 -
                              QASYMM8QASYMM8_SIGNED -
                              QASYMM8QASYMM16 -
                              QASYMM8_SIGNEDQASYMM8 -
                              QASYMM8_SIGNEDQASYMM8_SIGNED -
                              QASYMM8_SIGNEDQASYMM16 -
                              F16QASYMM8 -
                              F16QASYMM8_SIGNED -
                              F16QASYMM16 -
                              F32QASYMM8 -
                              F32QASYMM8_SIGNED -
                              F32QASYMM16 +
                              AllAll
                              - ReshapeLayer - Fucntion to reshape a tensor + StridedSlice + Function to extract a strided slice of a tensor.
                                -
                              • ANEURALNETWORKS_RESHAPE -
                              • ANEURALNETWORKS_SQUEEZE +
                              • ANEURALNETWORKS_STRIDED_SLICE
                              - NEReshapeLayer + NEStridedSlice
                              • All @@ -643,7 +2284,7 @@ where N = batches, C = channels, H = height, W = width AllAll - CLReshapeLayer + CLStridedSlice
                                • All @@ -654,54 +2295,41 @@ where N = batches, C = channels, H = height, W = width AllAll - Scale - Fucntion to perform resize a tensor using to interpolate: - Bilenear - Nearest neighbor + Tile + Function to construct a tensor by tiling a given tensor.
                                    -
                                  • ANEURALNETWORKS_RESIZE_BILINEAR -
                                  • ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR +
                                  • ANEURALNETWORKS_TILE
                                  - NEScale + NETile
                                    -
                                  • NHWC -
                                  • NCHW +
                                  • All
                                  srcdst -
                                  QASYMM8QASYMM8 -
                                  QASYMM8_SIGNEDQASYMM8_SIGNED -
                                  F16F16 -
                                  F32F32 -
                                  U8U8 -
                                  S16S16 +
                                  AllAll
                                  - CLScale + CLTile
                                    -
                                  • NHWC -
                                  • NCHW +
                                  • All
                                  srcdst -
                                  QASYMM8QASYMM8 -
                                  QASYMM8_SIGNEDQASYMM8_SIGNED -
                                  F16F16 -
                                  F32F32 -
                                  U8U8 -
                                  S16S16 +
                                  AllAll
                                  - Slice - Function to perform tensor slicing. + Transpose + Function to transpose a 2D tensor.
                                    -
                                  • ANEURALNETWORKS_SLICE +
                                  • ANEURALNETWORKS_TRANSPOSE
                                  - NESlice + NETranspose
                                  • All @@ -712,7 +2340,7 @@ where N = batches, C = channels, H = height, W = width AllAll - CLSlice + CLTranspose
                                    • All @@ -723,13 +2351,13 @@ where N = batches, C = channels, H = height, W = width AllAll - StridedSlice - Function to extract a strided slice of a tensor. + Unstack + Function to unpack a rank-R tensor into rank-(R-1) tensors.
                                        -
                                      • ANEURALNETWORKS_STRIDED_SLICE +
                                      • n/a
                                      - NEStridedSlice + NEUnstack
                                      • All @@ -740,7 +2368,7 @@ where N = batches, C = channels, H = height, W = width AllAll - CLStridedSlice + CLUnstack
                                        • All @@ -751,32 +2379,36 @@ where N = batches, C = channels, H = height, W = width AllAll - Transpose - Function to transpose an 2D tensor. + WinogradConvolutionLayer + Function to do Winograd Convolution.
                                            -
                                          • ANEURALNETWORKS_TRANSPOSE +
                                          • ANEURALNETWORKS_CONV_2D
                                          - NETranspose + NEWinogradConvolutionLayer
                                            -
                                          • All +
                                          • NHWC +
                                          • NCHW
                                          -
                                          srcdst -
                                          AllAll +
                                          src0src1src2dst +
                                          F16F16F16F16 +
                                          F32F32F32F32
                                          - CLTranspose + CLWinogradConvolutionLayer
                                            -
                                          • All +
                                          • NHWC +
                                          • NCHW
                                          -
                                          srcdst -
                                          AllAll +
                                          src0src1src2dst +
                                          F16F16F16F16 +
                                          F32F32F32F32
                                          diff --git a/src/core/CL/cl_kernels/bounding_box_transform.cl b/src/core/CL/cl_kernels/bounding_box_transform.cl index a9b0496a6e..f2e9cb0ed0 100644 --- a/src/core/CL/cl_kernels/bounding_box_transform.cl +++ b/src/core/CL/cl_kernels/bounding_box_transform.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #if defined(DATA_TYPE) && defined(WEIGHT_X) && defined(WEIGHT_Y) && defined(WEIGHT_W) && defined(WEIGHT_H) && defined(IMG_WIDTH) && defined(IMG_HEIGHT) && defined(BOX_FIELDS) && defined(SCALE_BEFORE) // Check for compile time constants -/** Perform a padded copy of input tensor to the output tensor. Padding values are defined at compile time +/** Transform proposal bounding boxes to target bounding box using bounding box deltas. * * @attention The following variables must be passed at compile time: * -# -DDATA_TYPE= Tensor data type. Supported data types: F16/F32 diff --git a/src/core/CL/cl_kernels/bounding_box_transform_quantized.cl b/src/core/CL/cl_kernels/bounding_box_transform_quantized.cl index 9e5cee55f4..c1d45a56b9 100644 --- a/src/core/CL/cl_kernels/bounding_box_transform_quantized.cl +++ b/src/core/CL/cl_kernels/bounding_box_transform_quantized.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #if defined(DATA_TYPE) && defined(DATA_TYPE_DELTAS) && defined(WEIGHT_X) && defined(WEIGHT_Y) && defined(WEIGHT_W) && defined(WEIGHT_H) && defined(IMG_WIDTH) && defined(IMG_HEIGHT) && defined(BOX_FIELDS) && defined(SCALE_BEFORE) && defined(OFFSET_BOXES) && defined(SCALE_BOXES) && defined(OFFSET_DELTAS) && defined(SCALE_DELTAS) && defined(OFFSET_PRED_BOXES) && defined(SCALE_PRED_BOXES) // Check for compile time constants -/** Perform a padded copy of input tensor to the output tensor for quantized data types. Padding values are defined at compile time +/** Transform proposal bounding boxes to target bounding box using bounding box deltas for quantized data types. * * @attention The following variables must be passed at compile time: * -# -DDATA_TYPE= Tensor data type. Supported data types: QASYMM16 for boxes and pred_boxes, QASYMM8 for for deltas diff --git a/src/core/CL/cl_kernels/crop_tensor.cl b/src/core/CL/cl_kernels/crop_tensor.cl index 62ae36ac5c..d9090dc838 100644 --- a/src/core/CL/cl_kernels/crop_tensor.cl +++ b/src/core/CL/cl_kernels/crop_tensor.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #if defined(DATA_TYPE) // Compile time constants -/** Performs a copy of input tensor to the output tensor. +/** Performs a tensor cropping. * * @param[in] in_ptr Pointer to the source tensor. Supported data types: All * @param[in] in_stride_x Stride of the source tensor in X dimension (in bytes) diff --git a/src/core/CL/cl_kernels/depth_to_space.cl b/src/core/CL/cl_kernels/depth_to_space.cl index d3231a59a1..f301e64d66 100644 --- a/src/core/CL/cl_kernels/depth_to_space.cl +++ b/src/core/CL/cl_kernels/depth_to_space.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,10 +24,10 @@ #include "helpers.h" #if defined(DATA_TYPE) && defined(BLOCK_SHAPE) && defined(CHANNEL_SIZE) -/** Batch to space transformation. (NCHW) +/** Depth to space transformation. (NCHW) * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float - * @note The input tensor batch size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2 + * @note The input tensor depth size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2 * @note The block shape must be passed at compile time using -DBLOCK_SHAPE. e.g. -DBLOCK_SHAPE=2 * * @param[in] input_ptr Pointer to the source tensor. Supported data types: All. @@ -66,10 +66,10 @@ __kernel void depth_to_space_nchw( *((__global DATA_TYPE *)tensor4D_offset(&out, out_x, out_y, z, batch_id)) = *((__global DATA_TYPE *)in.ptr); } -/** Batch to space transformation. (NHWC) +/** Depth to space transformation. (NHWC) * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float - * @note The input tensor batch size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2 + * @note The input tensor depth size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2 * @note The block shape must be passed at compile time using -DBLOCK_SHAPE. e.g. -DBLOCK_SHAPE=2 * * @param[in] input_ptr Pointer to the source tensor. Supported data types: All. diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.h b/src/core/NEON/kernels/NEReductionOperationKernel.h index 667305b3aa..08e654fd21 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.h +++ b/src/core/NEON/kernels/NEReductionOperationKernel.h @@ -59,7 +59,7 @@ public: /** Set the source, destination of the kernel * - * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. + * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 @@ -69,7 +69,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel. * - * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. + * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 -- cgit v1.2.1