aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeresa Charlin <teresa.charlinreyes@arm.com>2021-04-28 10:58:49 +0100
committerSheri Zhang <sheri.zhang@arm.com>2021-04-30 17:18:15 +0000
commit62687420901c12be609426f3cf9dee300d25746a (patch)
treed9eaf86763aaef2debb4b3ac8ddd6c7064468325
parent50929ef951880469b9d579323d2f9c9f5025327d (diff)
downloadComputeLibrary-62687420901c12be609426f3cf9dee300d25746a.tar.gz
Update operator list documentation. Part 2.
All data type and data layout information for the operators are store in the function header files Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: I30b564f7eda6bbd99bf3ad36ddb6639ac118eb8b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/319829 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5531 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h14
-rw-r--r--arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h11
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseAnd.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseNot.h7
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseOr.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLBitwiseXor.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h13
-rw-r--r--arm_compute/runtime/CL/functions/CLCast.h24
-rw-r--r--arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolutionLayer.h31
-rw-r--r--arm_compute/runtime/CL/functions/CLCropResize.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h14
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthConvertLayer.h24
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h11
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h14
-rw-r--r--arm_compute/runtime/CL/functions/CLDequantizationLayer.h19
-rw-r--r--arm_compute/runtime/CL/functions/CLFillBorder.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLFlattenLayer.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMM.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h16
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h22
-rw-r--r--arm_compute/runtime/CL/functions/CLGather.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLLSTMLayer.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLNormalizationLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLPadLayer.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLPriorBoxLayer.h11
-rw-r--r--arm_compute/runtime/CL/functions/CLQLSTMLayer.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLQuantizationLayer.h20
-rw-r--r--arm_compute/runtime/CL/functions/CLRNNLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLROIAlignLayer.h13
-rw-r--r--arm_compute/runtime/CL/functions/CLROIPoolingLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLRange.h18
-rw-r--r--arm_compute/runtime/CL/functions/CLReduceMean.h13
-rw-r--r--arm_compute/runtime/CL/functions/CLReductionOperation.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLRemap.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLReorgLayer.h11
-rw-r--r--arm_compute/runtime/CL/functions/CLReverse.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLSelect.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h11
-rw-r--r--arm_compute/runtime/CL/functions/CLSplit.h14
-rw-r--r--arm_compute/runtime/CL/functions/CLStackLayer.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLTile.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLUnstack.h10
-rw-r--r--arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseAnd.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseNot.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseOr.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseXor.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NECast.h26
-rw-r--r--arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolutionLayer.h27
-rw-r--r--arm_compute/runtime/NEON/functions/NECropResize.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h14
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h24
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h14
-rw-r--r--arm_compute/runtime/NEON/functions/NEDequantizationLayer.h19
-rw-r--r--arm_compute/runtime/NEON/functions/NEFillBorder.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEFlattenLayer.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h13
-rw-r--r--arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h20
-rw-r--r--arm_compute/runtime/NEON/functions/NEGather.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NENormalizationLayer.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEPadLayer.h9
-rw-r--r--arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NEQLSTMLayer.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEQuantizationLayer.h20
-rw-r--r--arm_compute/runtime/NEON/functions/NERNNLayer.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEROIAlignLayer.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h9
-rw-r--r--arm_compute/runtime/NEON/functions/NERange.h17
-rw-r--r--arm_compute/runtime/NEON/functions/NEReduceMean.h13
-rw-r--r--arm_compute/runtime/NEON/functions/NEReductionOperation.h16
-rw-r--r--arm_compute/runtime/NEON/functions/NERemap.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEReorgLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NEReverse.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NESelect.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h9
-rw-r--r--arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h9
-rw-r--r--arm_compute/runtime/NEON/functions/NESplit.h14
-rw-r--r--arm_compute/runtime/NEON/functions/NEStackLayer.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NETile.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEUnstack.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h10
-rw-r--r--arm_compute/runtime/OperatorList.h301
-rw-r--r--docs/09_operators_list.dox1742
-rw-r--r--src/core/CL/cl_kernels/bounding_box_transform.cl4
-rw-r--r--src/core/CL/cl_kernels/bounding_box_transform_quantized.cl4
-rw-r--r--src/core/CL/cl_kernels/crop_tensor.cl4
-rw-r--r--src/core/CL/cl_kernels/depth_to_space.cl10
-rw-r--r--src/core/NEON/kernels/NEReductionOperationKernel.h4
115 files changed, 2999 insertions, 394 deletions
diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
index c254284cd7..a971163c45 100644
--- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -65,6 +65,18 @@ public:
~CLArgMinMaxLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:---------|
+ * |QASYMM8 |U32, S32 |
+ * |QASYMM8_SIGNED |U32, S32 |
+ * |S32 |U32, S32 |
+ * |F16 |U32, S32 |
+ * |F32 |U32, S32 |
+ *
* @param[in] input Input source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
* @param[in] axis Axis to find max/min index.
* @param[out] output Output source tensor. Data types supported: U32/S32.
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index c8acf9fc6b..fcfeb5ea3b 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,6 +60,16 @@ public:
~CLBatchNormalizationLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
diff --git a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
index bdb58531d0..f6ba2b0b02 100644
--- a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,6 +55,15 @@ public:
~CLBatchToSpaceLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:---------|:---------|:----------|
+ * |All |s32 |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
* @param[out] output Tensor output. Data types supported: same as @p input
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
index a703242875..b30be9b24f 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
@@ -41,6 +41,14 @@ class CLBitwiseAnd : public ICLSimpleFunction
public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 Input tensor. Data types supported: U8.
* @param[in] input2 Input tensor. Data types supported: U8.
* @param[out] output Output tensor. Data types supported: U8.
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h
index 6f65749d9f..1456ebe57e 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseNot.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h
@@ -40,6 +40,13 @@ class CLBitwiseNot : public ICLSimpleFunction
{
public:
/** Initialize the function
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
*
* @param[in] input Input tensor. Data types supported: U8.
* @param[out] output Output tensor. Data types supported: U8.
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h
index 3c904fb903..ff0a1f0d73 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseOr.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h
@@ -41,6 +41,14 @@ class CLBitwiseOr : public ICLSimpleFunction
public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 Input tensor. Data types supported: U8.
* @param[in] input2 Input tensor. Data types supported: U8.
* @param[out] output Output tensor. Data types supported: U8.
diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h
index a33a64ad71..0cd9d073b4 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseXor.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h
@@ -41,6 +41,14 @@ class CLBitwiseXor : public ICLSimpleFunction
public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 Input tensor. Data types supported: U8.
* @param[in] input2 Input tensor. Data types supported: U8.
* @param[out] output Output tensor. Data types supported: U8.
diff --git a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
index d6409106da..d3499c3949 100644
--- a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
+++ b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,6 +45,17 @@ class CLBoundingBoxTransform : public ICLSimpleFunction
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM16 |QASYMM8 |QASYMM16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
* @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
* @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h
index bd333d4e72..6e4cf62547 100644
--- a/arm_compute/runtime/CL/functions/CLCast.h
+++ b/arm_compute/runtime/CL/functions/CLCast.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,17 +41,21 @@ class CLCast : public ICLSimpleFunction
public:
/** Initialize the function's source, destination
*
- * Input data type must be different than output data type.
+ * Valid data layouts:
+ * - All
*
- * Valid conversions Input -> Output :
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------------------------------|
+ * |U8 | S8, U16, S16, U32, S32, F16, F32 |
+ * |U16 | U8, S8, S16, U32, S32, F16, F32 |
+ * |S16 | U8, S8, U16, U32, S32, F16, F32 |
+ * |U32 | U8, S8, U16, S16, S32, F16, F32 |
+ * |S32 | U8, S8, U16, S16, U32, F16, F32 |
+ * |F16 | U8, S8, U16, S16, U32, F32 |
+ * |F32 | U8, S8, U16, S16, U32, F16 |
*
- * - U8 -> S8, U16, S16, U32, S32, F16, F32
- * - U16 -> U8, S8, S16, U32, S32, F16, F32
- * - S16 -> U8, S8, U16, U32, S32, F16, F32
- * - U32 -> U8, S8, U16, S16, S32, F16, F32
- * - S32 -> U8, S8, U16, S16, U32, F16, F32
- * - F16 -> U8, S8, U16, S16, U32, F32
- * - F32 -> U8, S8, U16, S16, U32, F16
+ * Input data type must be different than output data type.
*
* @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
* @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
index 54cf59f59a..d60548d9cc 100644
--- a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,6 +44,14 @@ class CLChannelShuffleLayer : public ICLSimpleFunction
public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Input tensor. Data types supported: All.
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index d1de721193..6884754d83 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -85,13 +85,27 @@ public:
CLConvolutionLayer &operator=(CLConvolutionLayer &&) = default;
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+ * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -111,9 +125,9 @@ public:
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+ * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -133,8 +147,9 @@ public:
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -156,7 +171,7 @@ public:
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
diff --git a/arm_compute/runtime/CL/functions/CLCropResize.h b/arm_compute/runtime/CL/functions/CLCropResize.h
index 0dc3c48b32..5c60c2879c 100644
--- a/arm_compute/runtime/CL/functions/CLCropResize.h
+++ b/arm_compute/runtime/CL/functions/CLCropResize.h
@@ -61,6 +61,14 @@ public:
/** Configure kernel
*
+ * Valid data layouts:
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------|:--------|:------|:--------|
+ * |All |F32 |F32 |F32 |
+ *
* @note Supported tensor rank: up to 4
* @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
* @note Start and end indices of boxes are inclusive.
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
index 4be8c17835..2dd4cd4bf5 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
@@ -46,6 +46,20 @@ public:
/** Set the input, weights, biases and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
* @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input or QSYMM8_PER_CHANNEL if @p input is QASYMM8/QASYMM8_SIGNED.
* @param[in] bias (Optional) The biases have one dimension. Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
index b0f297aec5..34dfdd7f3a 100644
--- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,17 +41,21 @@ class CLDepthConvertLayer : public ICLSimpleFunction
public:
/** Initialize the function's source, destination
*
- * Input data type must be different than output data type.
+ * Valid data layouts:
+ * - All
*
- * Valid conversions Input -> Output :
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:-------------------------------------|
+ * |U8 | S8, U16, S16, U32, S32, F16, F32 |
+ * |U16 | U8, S8, S16, U32, S32, F16, F32 |
+ * |S16 | U8, S8, U16, U32, S32, F16, F32 |
+ * |U32 | U8, S8, U16, S16, S32, F16, F32 |
+ * |S32 | U8, S8, U16, S16, U32, F16, F32 |
+ * |F16 | U8, S8, U16, S16, U32, F32 |
+ * |F32 | U8, S8, U16, S16, U32, F16 |
*
- * - U8 -> S8, U16, S16, U32, S32, F16, F32
- * - U16 -> U8, S8, S16, U32, S32, F16, F32
- * - S16 -> U8, S8, U16, U32, S32, F16, F32
- * - U32 -> U8, S8, U16, S16, S32, F16, F32
- * - S32 -> U8, S8, U16, S16, U32, F16, F32
- * - F16 -> U8, S8, U16, S16, U32, F32
- * - F32 -> U8, S8, U16, S16, U32, F16
+ * Input data type must be different than output data type.
*
* @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
* @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
index a0aa288dbf..0026cc2b67 100644
--- a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,15 @@ class CLDepthToSpaceLayer : public ICLSimpleFunction
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value.
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index 1af9e1dc6f..f31a17d9cb 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -58,6 +58,20 @@ public:
~CLDepthwiseConvolutionLayer();
/** Initialize the function's source, destination, weights and convolution information.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW
* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
index 4a5c3a3203..601c13d0e4 100644
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
@@ -58,18 +58,13 @@ public:
* - All
*
* Valid data type configurations:
- * |src |dst |
- * |:------------------|:--------------|
- * |QASYMM8 |F16 |
- * |QASYMM8 |F32 |
- * |QASYMM8_SIGNED |F16 |
- * |QASYMM8_SIGNED |F32 |
- * |QSYMM8_PER_CHANNEL |F16 |
- * |QSYMM8_PER_CHANNEL |F32 |
- * |QSYMM8 |F16 |
- * |QSYMM8 |F32 |
- * |QSYMM16 |F16 |
- * |QSYMM16 |F32 |
+ * |src |dst |
+ * |:------------------|:---------|
+ * |QASYMM8 |F16, F32 |
+ * |QASYMM8_SIGNED |F16, F32 |
+ * |QSYMM8_PER_CHANNEL |F16, F32 |
+ * |QSYMM8 |F16, F32 |
+ * |QSYMM16 |F16, F32 |
*
* @param[in] input Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.
* Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h
index a4ad82dfd4..20f2e15b72 100644
--- a/arm_compute/runtime/CL/functions/CLFillBorder.h
+++ b/arm_compute/runtime/CL/functions/CLFillBorder.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,14 @@ class CLFillBorder : public ICLSimpleFunction
public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in,out] tensor Source tensor. Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
* @param[in] border_width The border width
* @param[in] border_mode Strategy to use for borders.
diff --git a/arm_compute/runtime/CL/functions/CLFlattenLayer.h b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
index 8dedd7458d..d2c37b1c22 100644
--- a/arm_compute/runtime/CL/functions/CLFlattenLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
@@ -44,6 +44,14 @@ class CLFlattenLayer : public IFunction
public:
/** Initialise the kernel's input and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input First input tensor to flatten with at least 3 dimensions.
* The dimensions above the third will be interpreted as batches. Data types supported: All.
* @param[out] output Output tensor with shape [w*h*d, input_batches] where:
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 75cb2dc1fa..eec01bcebe 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -117,6 +117,18 @@ public:
CLFullyConnectedLayer &operator=(CLFullyConnectedLayer &&) = default;
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. The weights must be 2 dimensional.
* If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
diff --git a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
index e35905fcf1..cd75270392 100644
--- a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
+++ b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,6 +55,16 @@ public:
~CLFuseBatchNormalization();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
* @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
* @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 8a210a2ba5..1e2ae7be64 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -126,6 +126,15 @@ public:
~CLGEMM();
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:------------|:-----------|:---------|:--------------|
+ * |F32 |F32 |F32 |F32 |
+ * |F16 |F16 |F16 |F16 |
+ *
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
*
* @note All tensors must have the same data type.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index 4dbd0f828a..082b481047 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -194,6 +194,20 @@ public:
~CLGEMMConvolutionLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 4cc8899690..e7f4cb9d01 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -61,6 +61,26 @@ public:
~CLGEMMLowpMatrixMultiplyCore();
/** Initialise the kernel's inputs, output
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QASYMM8 |S32 |S32 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |S32 |
+ * |QASYMM8 |QSYMM8 |S32 |S32 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8 |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 |
+ * |QASYMM8_SIGNED |QSYMM8 |S32 |S32 |
+ *
* @note GEMMLowp: low precision GEMM kernel. [A * B + C]
* This kernel performs the following computations:
*
diff --git a/arm_compute/runtime/CL/functions/CLGather.h b/arm_compute/runtime/CL/functions/CLGather.h
index 9c659be6fc..7a57c7358c 100644
--- a/arm_compute/runtime/CL/functions/CLGather.h
+++ b/arm_compute/runtime/CL/functions/CLGather.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,14 @@ class CLGather : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs and outputs
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All.
* @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
* @param[out] output Destination tensor. Data type supported: Same as @p input
diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
index bea470712c..aec5cdf1a8 100644
--- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
@@ -77,6 +77,16 @@ public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QSYMM8 |QSYMM16 |QASYMM8 |
+ *
* @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors.
* Data types supported: QASYMM8/F16/F32
* @param[in] deltas Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores
diff --git a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
index a6e5b1622b..985a6a75f7 100644
--- a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
@@ -65,6 +65,16 @@ public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
* Data types supported: F16/F32. Data layout supported: NHWC, NCHW
* @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
index 401d249eb4..4dc5c778d2 100644
--- a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -65,6 +65,16 @@ public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
index 38a24d030b..d26b4c5595 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
@@ -75,6 +75,15 @@ public:
~CLLSTMLayer();
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 - src13 | dst0 - dst3 |
+ * |:------------|:------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
* @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
index 0829052384..2ef7427a5a 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -73,6 +73,14 @@ public:
CLLSTMLayerQuantized &operator=(CLLSTMLayerQuantized &&) = default;
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 |
+ * |:-----------|:------------|:-------|:------|:------|:------|
+ * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8|
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
* @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
diff --git a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h
index 24d620d372..f7ff1234f6 100644
--- a/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h
@@ -56,6 +56,18 @@ public:
~CLMaxUnpoolingLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note Output shape must be equal to the shape of the original input to pool.
*
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
index cfe59eac09..68a7df24e6 100644
--- a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,16 @@ class CLMeanStdDevNormalizationLayer : public ICLSimpleFunction
public:
/** Initialise the function's input and outputs.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @note If the output tensor is a nullptr, the normalization will be performed in-place.
*
* @param[in, out] input Input tensor with 2 dimensions. Data types supported: F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
index 706cb6f152..15406f7728 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
@@ -61,6 +61,16 @@ public:
~CLNormalizationLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
* and an optional 4th dimension for batch of inputs. Data types supported: F16/F32 (Written to by the border handler).
* Data layouts supported: NCHW/NHWC.
diff --git a/arm_compute/runtime/CL/functions/CLPadLayer.h b/arm_compute/runtime/CL/functions/CLPadLayer.h
index dae95f63e6..7f950bcfb3 100644
--- a/arm_compute/runtime/CL/functions/CLPadLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPadLayer.h
@@ -59,6 +59,15 @@ public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Data types supported: All.
* @param[out] output Output tensor. Data type supported: same as @p input
* @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
diff --git a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
index 9129bfd064..9b36c9e433 100644
--- a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,6 +43,15 @@ public:
CLPriorBoxLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------|:--------|:--------|
+ * |F32 |F32 |F32 |
+ *
* @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
* @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
* @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1
diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
index 954f224424..bd00d56468 100644
--- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
@@ -74,6 +74,14 @@ public:
~CLQLSTMLayer();
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 - src6 |src7 -src9 |src10 |src11 |dst0 |dst1 - dst2 |
+ * |:-------------|:------------|:------------|:------|:-------------|:------|:-----------------|
+ * |QASYMM8_SIGNED|QASYMM8 |S32 |QSYMM16|QASYMM8_SIGNED|QSYMM16|QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
* @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
index 6a44a226d4..a61735cb97 100644
--- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
@@ -63,20 +63,12 @@ public:
* - All
*
* Valid data type configurations:
- * |src |dst |
- * |:------------------|:--------------|
- * |QASYMM8 |QASYMM8 |
- * |QASYMM8 |QASYMM8_SIGNED |
- * |QASYMM8 |QASYMM16 |
- * |QASYMM8_SIGNED |QASYMM8 |
- * |QASYMM8_SIGNED |QASYMM8_SIGNED |
- * |QASYMM8_SIGNED |QASYMM16 |
- * |F16 |QASYMM8 |
- * |F16 |QASYMM8_SIGNED |
- * |F16 |QASYMM16 |
- * |F32 |QASYMM8 |
- * |F32 |QASYMM8_SIGNED |
- * |F32 |QASYMM16 |
+ * |src |dst |
+ * |:------------------|:----------------------------------|
+ * |QASYMM8 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |QASYMM8_SIGNED |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |F16 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |F32 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
*
* @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
* @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h
index 50575daaa3..2b3b35e37d 100644
--- a/arm_compute/runtime/CL/functions/CLRNNLayer.h
+++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h
@@ -51,6 +51,16 @@ public:
~CLRNNLayer();
/** Initialize the function
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |src3 |dst0 |dst1 |
+ * |:------|:------|:------|:------|:------|:------|
+ * |F16 |F16 |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |F32 |F32 |
+ *
* @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
* @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input
* @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the current 'state'. Data types supported: Same as @p input
diff --git a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
index b4cd5560ef..1eaea1b297 100644
--- a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
+++ b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,6 +45,17 @@ class CLROIAlignLayer : public ICLSimpleFunction
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM16 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM16 |QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
* as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
diff --git a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
index a4c5c76f2e..151586a1f6 100644
--- a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
@@ -45,6 +45,16 @@ class CLROIPoolingLayer : public ICLSimpleFunction
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |U16 |F16 |
+ * |F32 |U16 |F32 |
+ * |QASYMM8 |U16 |QASYMM8 |
+ *
* @param[in] input Source tensor. Data types supported: F16/F32/QASYMM8
* @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
* as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
diff --git a/arm_compute/runtime/CL/functions/CLRange.h b/arm_compute/runtime/CL/functions/CLRange.h
index e11e740861..fbce05162c 100644
--- a/arm_compute/runtime/CL/functions/CLRange.h
+++ b/arm_compute/runtime/CL/functions/CLRange.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,6 +43,22 @@ class CLRange : public ICLSimpleFunction
public:
/** Initialize the kernel's start, end, step and output tensor.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |dst |
+ * |:---------|
+ * |U8 |
+ * |S8 |
+ * |QASYMM8 |
+ * |U16 |
+ * |S16 |
+ * |U32 |
+ * |S32 |
+ * |F16 |
+ * |F32 |
+ *
* @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
* @param[in] start The starting value of the sequence.
* @param[in] end The ending (not including) value of the sequence.
diff --git a/arm_compute/runtime/CL/functions/CLReduceMean.h b/arm_compute/runtime/CL/functions/CLReduceMean.h
index c37ee8c5ab..1ce088b2ce 100644
--- a/arm_compute/runtime/CL/functions/CLReduceMean.h
+++ b/arm_compute/runtime/CL/functions/CLReduceMean.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,6 +45,17 @@ public:
CLReduceMean(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Configure kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note Supported tensor rank: up to 4
*
* @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index 58164fdcb3..2245735b62 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -62,6 +62,18 @@ public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |S32 |S32 |
+ *
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3
diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h
index 87d5f9fec7..39ee13b8d0 100644
--- a/arm_compute/runtime/CL/functions/CLRemap.h
+++ b/arm_compute/runtime/CL/functions/CLRemap.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,6 +47,14 @@ class CLRemap : public ICLSimpleFunction
public:
/** Initialise the function's sources, destination, interpolation policy and border mode.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:------|:------|:------|:------|
+ * |U8 |F32 |F32 |U 8 |
+ *
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
* @param[in] map_x Map for X coords. Data types supported: F32.
* @param[in] map_y Map for Y coords. Data types supported: F32.
diff --git a/arm_compute/runtime/CL/functions/CLReorgLayer.h b/arm_compute/runtime/CL/functions/CLReorgLayer.h
index 0840fd13fd..976b8f6213 100644
--- a/arm_compute/runtime/CL/functions/CLReorgLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReorgLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,6 +38,15 @@ class CLReorgLayer : public ICLSimpleFunction
public:
/** Initialise the function's source and destination.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Data types supported: All.
* @param[out] output Destination tensor with tensor shape:
* [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
diff --git a/arm_compute/runtime/CL/functions/CLReverse.h b/arm_compute/runtime/CL/functions/CLReverse.h
index 81fa04b1f5..94c63ca92d 100644
--- a/arm_compute/runtime/CL/functions/CLReverse.h
+++ b/arm_compute/runtime/CL/functions/CLReverse.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,14 @@ class CLReverse : public ICLSimpleFunction
public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |All |U32 |All |
+ *
* @param[in] input Input tensor. Data types supported: All.
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
diff --git a/arm_compute/runtime/CL/functions/CLSelect.h b/arm_compute/runtime/CL/functions/CLSelect.h
index 7fd52312fb..8b1e6b2019 100644
--- a/arm_compute/runtime/CL/functions/CLSelect.h
+++ b/arm_compute/runtime/CL/functions/CLSelect.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,14 @@ class CLSelect : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:------|:--------------|
+ * |U8 |All |All |All |
+ *
* @param[in] c Condition input tensor. Data types supported: U8.
* @param[in] x First input tensor. Data types supported: All.
* @param[in] y Second input tensor. Data types supported: Same as @p x
diff --git a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
index dc02fa1363..304a74137e 100644
--- a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
@@ -60,6 +60,15 @@ public:
~CLSpaceToBatchLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:---------|:---------|:---------|:---------|
+ * |All |S32 |S32 |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[in] block_shape 1-D tensor with shape [M]. Supported M: 2. Data types supported: S32
* @param[in] paddings 2-D tensor with shape [2, M] (First dimension is the fastest-changing dimension). Supported M: 2. Data types supported: S32
diff --git a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
index 9e476fe7bd..8a47e95f9d 100644
--- a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,6 +54,15 @@ public:
~CLSpaceToDepthLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value.
diff --git a/arm_compute/runtime/CL/functions/CLSplit.h b/arm_compute/runtime/CL/functions/CLSplit.h
index 2931203765..86c7bdde7d 100644
--- a/arm_compute/runtime/CL/functions/CLSplit.h
+++ b/arm_compute/runtime/CL/functions/CLSplit.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,18 @@ namespace arm_compute
class CLSplit : public CPPSplit<CLSlice, ICLTensor>
{
public:
+ /** CLSplit
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
+ */
+
// Inherited methods overridden:
void run() override;
};
diff --git a/arm_compute/runtime/CL/functions/CLStackLayer.h b/arm_compute/runtime/CL/functions/CLStackLayer.h
index 3861fd299a..54c903a706 100644
--- a/arm_compute/runtime/CL/functions/CLStackLayer.h
+++ b/arm_compute/runtime/CL/functions/CLStackLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -59,6 +59,14 @@ public:
~CLStackLayer();
/** Initialise the kernel's inputs vector and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @note Supported input tensor rank: up to 4
*
* @param[in] input The vectors containing all the tensors with the same shape to stack. Data types supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLTile.h b/arm_compute/runtime/CL/functions/CLTile.h
index 69743693ff..c266adbbd4 100644
--- a/arm_compute/runtime/CL/functions/CLTile.h
+++ b/arm_compute/runtime/CL/functions/CLTile.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,14 @@ class CLTile : public ICLSimpleFunction
public:
/** Set the source, destination of the kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Data type supported: All.
* @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
* @param[out] output Destination tensor. Same as @p input
diff --git a/arm_compute/runtime/CL/functions/CLUnstack.h b/arm_compute/runtime/CL/functions/CLUnstack.h
index 5d4d5710ab..32ad439b70 100644
--- a/arm_compute/runtime/CL/functions/CLUnstack.h
+++ b/arm_compute/runtime/CL/functions/CLUnstack.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,6 +48,14 @@ public:
CLUnstack();
/** Set the input, output and unstacking axis.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input A tensor to be unstacked. Data type supported: All.
* @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input.
* Note: The number of elements of the vector will be used as the number of slices to be taken from the axis.
diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
index 9ced69c1bb..7b42932f82 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -62,6 +62,16 @@ public:
~CLWinogradConvolutionLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ *
* @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout
* @note Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true
*
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
index cbf1d5b444..4392de7b28 100644
--- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -64,6 +64,18 @@ public:
~NEArgMinMaxLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:----------|
+ * |QASYMM8 |U32, S32 |
+ * |QASYMM8_SIGNED |U32, S32 |
+ * |S32 |U32, S32 |
+ * |F16 |U32, S32 |
+ * |F32 |U32, S32 |
+ *
* @param[in] input Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32.
* @param[in] axis Axis to find max/min index.
* @param[out] output Output source tensor. Data types supported: U32/S32.
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index 6d56a267a7..ec00fbdbf2 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,6 +58,16 @@ public:
~NEBatchNormalizationLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
*
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
index c2fd26d34c..810bf81a22 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,6 +52,15 @@ public:
~NEBatchToSpaceLayer() = default;
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:---------|:---------|:----------|
+ * |All |s32 |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
* @param[out] output Tensor output. Data types supported: same as @p input
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
index 3203d2b9a7..1f95f193d3 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,6 +48,14 @@ public:
~NEBitwiseAnd() = default;
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
index 9fa0d38caf..c66bebf7cc 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,14 @@ class NEBitwiseNot : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's input and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input Input tensor. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
*/
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
index fba6b784de..183df212e4 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,14 @@ class NEBitwiseOr : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
index c6cb584284..126aaa6ddd 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,14 @@ class NEBitwiseXor : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |U8 |U8 |
+ *
* @param[in] input1 First tensor input. Data type supported: U8.
* @param[in] input2 Second tensor input. Data type supported: U8.
* @param[out] output Output tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
index c377520a12..2a196a2de5 100644
--- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
+++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
@@ -38,6 +38,17 @@ class NEBoundingBoxTransform : public INESimpleFunctionNoBorder
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |QASYMM16 |QASYMM8 |QASYMM16 |
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
* @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
* @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
* @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h
index e536317660..eb7de1fadb 100644
--- a/arm_compute/runtime/NEON/functions/NECast.h
+++ b/arm_compute/runtime/NEON/functions/NECast.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,18 +40,22 @@ class NECast : public INESimpleFunctionNoBorder
public:
/** Initialize the function's source, destination
*
- * Input data type must be different than output data type.
+ * Valid data layouts:
+ * - All
*
- * Valid conversions Input -> Output :
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:-----------------------------------------------|
+ * |QASYMM8_SIGNED | S16, S32, F32, F16 |
+ * |QASYMM8 | U16, S16, S32, F32, F16 |
+ * |U8 | U16, S16, S32, F32, F16 |
+ * |U16 | U8, U32 |
+ * |S16 | QASYMM8_SIGNED, U8, S32 |
+ * |F16 | QASYMM8_SIGNED, QASYMM8, F32, S32, U8 |
+ * |S32 | QASYMM8_SIGNED, QASYMM8, F16, F32, U8 |
+ * |F32 | QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8|
*
- * - QASYMM8_SIGNED -> S16, S32, F32, F16
- * - QASYMM8 -> U16, S16, S32, F32, F16
- * - U8 -> U16, S16, S32, F32, F16
- * - U16 -> U8, U32
- * - S16 -> QASYMM8_SIGNED, U8, S32
- * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8
- * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8
- * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8
+ * Input data type must be different than output data type.
*
* @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32.
* @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/BFLOAT16/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
index aa11396c20..8888efec4f 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,6 +44,14 @@ class NEChannelShuffleLayer : public INESimpleFunctionNoBorder
public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Input tensor. Data types supported: All
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index b1e85523c5..f19aa8008b 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -86,12 +86,27 @@ public:
~NEConvolutionLayer() = default;
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -110,9 +125,10 @@ public:
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -134,7 +150,8 @@ public:
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h
index 7dcf925650..143bbbc6f1 100644
--- a/arm_compute/runtime/NEON/functions/NECropResize.h
+++ b/arm_compute/runtime/NEON/functions/NECropResize.h
@@ -54,6 +54,14 @@ public:
/** Configure kernel
*
+ * Valid data layouts:
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------|:--------|:------|:--------|
+ * |All |F32 |F32 |F32 |
+ *
* @note Supported tensor rank: up to 4
* @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
* @note Start and end indices of boxes are inclusive.
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index c16cf26095..34ab0707c2 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -90,6 +90,20 @@ public:
/** Set the input, weights, biases and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
* @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
* @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index c9817a63c1..17cf539717 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,15 +48,21 @@ public:
~NEDepthConvertLayer() = default;
/** Initialize the function's source, destination
*
- * Valid conversions Input -> Output :
+ * Valid data layouts:
+ * - All
*
- * - QASYMM8 -> F16, F32
- * - U8 -> U16, S16, S32
- * - U16 -> U8, U32
- * - S16 -> U8, S32
- * - BFLOAT16 -> F32
- * - F16 -> QASYMM8, F32
- * - F32 -> QASYMM8, F16, BFLOAT16
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------------------|
+ * |QASYMM8 | F16, F32 |
+ * |U8 | U16, S16, S32 |
+ * |U16 | U8, U32 |
+ * |S16 | U8, S32 |
+ * |BFLOAT16 | F32 |
+ * |F16 | QASYMM8, F32 |
+ * |F32 | QASYMM8, F16, BFLOAT16 |
+ *
+ * Input data type must be different than output data type.
*
* @param[in] input The input tensor to convert. Data types supported: QASYMM8/U8/U16/S16/BFLOAT16/F16/F32.
* @param[out] output The output tensor. Data types supported: QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
index 51f7ff7770..b9bdcd1f11 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,6 +53,15 @@ public:
~NEDepthToSpaceLayer() = default;
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value.
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index c74b2a93ee..2f541758f4 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -55,6 +55,20 @@ public:
~NEDepthwiseConvolutionLayer();
/** Initialize the function's source, destination, weights and convolution information.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
* @param[out] output Destination tensor. Data type supported: same as @p input.
* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
index dfec835f45..91ed056cf3 100644
--- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
@@ -57,18 +57,13 @@ public:
* - All
*
* Valid data type configurations:
- * |src |dst |
- * |:------------------|:--------------|
- * |QASYMM8 |F16 |
- * |QASYMM8 |F32 |
- * |QASYMM8_SIGNED |F16 |
- * |QASYMM8_SIGNED |F32 |
- * |QSYMM8_PER_CHANNEL |F16 |
- * |QSYMM8_PER_CHANNEL |F32 |
- * |QSYMM8 |F16 |
- * |QSYMM8 |F32 |
- * |QSYMM16 |F16 |
- * |QSYMM16 |F32 |
+ * |src |dst |
+ * |:------------------|:-----------|
+ * |QASYMM8 |F16, F32 |
+ * |QASYMM8_SIGNED |F16, F32 |
+ * |QSYMM8_PER_CHANNEL |F16, F32 |
+ * |QSYMM8 |F16, F32 |
+ * |QSYMM16 |F16, F32 |
*
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h
index 8a8a0c7dc2..ab77c28839 100644
--- a/arm_compute/runtime/NEON/functions/NEFillBorder.h
+++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h
@@ -42,6 +42,14 @@ public:
NEFillBorder();
/** Initialize the function's source, destination and border_mode.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @note This function fills the borders within the XY-planes.
*
* @param[in, out] input Source tensor. Data type supported: All
diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
index 1104aac77f..e688e918d9 100644
--- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,14 @@ class NEFlattenLayer : public IFunction
public:
/** Initialise the kernel's input and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: All
* @param[out] output Output tensor with shape [w*h*d, input_batches] where:
* w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index bc45e58b4b..9727e108a5 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -36,7 +36,6 @@
namespace arm_compute
{
-
namespace weights_transformations
{
/** Basic function to manage the reshape weights generated from @ref NETranspose */
@@ -102,6 +101,18 @@ public:
~NEFullyConnectedLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. The weights must be 2 dimensional.
* If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
diff --git a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
index 5dc804e240..3dd7f49044 100644
--- a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
+++ b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,6 +52,16 @@ public:
~NEFuseBatchNormalization();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
* @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
* @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index a6c3436656..d4a9f68beb 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -76,6 +76,16 @@ public:
~NEGEMM();
/** Initialise the kernel's inputs, output
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:------------|:-----------|:---------|:--------------|
+ * |F32 |F32 |F32 |F32 |
+ * |F16 |F16 |F16 |F16 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ *
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
* @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
*
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 9897bf1d4d..e89eae1d31 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -177,6 +177,21 @@ public:
~NEGEMMConvolutionLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index b2b77bd406..780723e752 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -76,6 +76,26 @@ public:
~NEGEMMLowpMatrixMultiplyCore();
/** Initialise the kernel's inputs, output
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QASYMM8 |S32 |S32 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |S32 |
+ * |QASYMM8 |QSYMM8 |S32 |S32 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8 |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 |
+ * |QASYMM8_SIGNED |QSYMM8 |S32 |S32 |
+ *
* @note GEMM_LOWP: low precision GEMM kernel
* This kernel performs the following computations:
*
diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h
index a5e0461227..393a38ee4d 100644
--- a/arm_compute/runtime/NEON/functions/NEGather.h
+++ b/arm_compute/runtime/NEON/functions/NEGather.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,14 @@ class NEGather : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and outputs
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All
* @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
* @param[out] output Destination tensor. Data type supported: Same as @p input
diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
index 22c6ba2ed6..3b683382ec 100644
--- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
@@ -72,6 +72,16 @@ public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QSYMM8 |QSYMM16 |QASYMM8 |
+ *
* @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors.
* Data types supported: QASYMM8/F16/F32
* @param[in] deltas Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores
diff --git a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
index 57165c94b4..bb0697072b 100644
--- a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,6 +60,16 @@ public:
~NEInstanceNormalizationLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in, out] input Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
* Data types supported: F16/F32. Data layout supported: NHWC, NCHW
* @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
index 173b9d2141..7f1a5e785e 100644
--- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,6 +60,16 @@ public:
~NEL2NormalizeLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in, out] input Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0)
* @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
* @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index ef8defb827..075fb4530a 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,6 +60,15 @@ public:
~NELSTMLayer();
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 - src13 | dst0 - dst3 |
+ * |:------------|:------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
* @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
index 53a024ae04..2f0c753691 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
@@ -77,6 +77,14 @@ public:
~NELSTMLayerQuantized();
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 |
+ * |:-----------|:------------|:-------|:------|:------|:------|
+ * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8|
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
* @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
index fae26b3c93..41ea040457 100644
--- a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
@@ -57,6 +57,18 @@ public:
~NEMaxUnpoolingLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note Only supported pool size 2
*
* @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
index 31e376191c..41aa81946b 100644
--- a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,6 +50,16 @@ public:
~NEMeanStdDevNormalizationLayer();
/** Initialise the function's input and outputs.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @note If the output tensor is a nullptr, the normalization will be performed in-place.
*
* @param[in, out] input Input tensor with 2 dimensions. Data types supported: F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index 8c4ad1516e..fbe000445c 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -63,6 +63,16 @@ public:
~NENormalizationLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |F32 |F32 |
+ * |F16 |F16 |
+ *
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
* and an optional 4th dimension for batch of inputs. Data type supported: F16/F32. Data layouts supported: NCHW/NHWC.
* @param[out] output Destination with the same dimensions, data type, data layout and number of channels of @p input
diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h
index 76ff0643a0..4aa6725496 100644
--- a/arm_compute/runtime/NEON/functions/NEPadLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h
@@ -65,6 +65,15 @@ public:
~NEPadLayer();
/** Initialize the function
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------|:---------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Data types supported: All.
* @param[out] output Output tensor. Data type supported: same as @p input
* @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
index 3cc79fa28e..38e0c9f3ad 100644
--- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,6 +38,15 @@ class NEPriorBoxLayer : public INESimpleFunctionNoBorder
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------|:--------|:--------|
+ * |F32 |F32 |F32 |
+ *
* @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
* @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
* @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index e706179415..7c2e9bc5a1 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -76,6 +76,14 @@ public:
~NEQLSTMLayer();
/** Initialize function's tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 - src6 |src7 -src9 |src10 |src11 |dst0 |dst1 - dst2 |
+ * |:-------------|:------------|:------------|:------|:-------------|:------|:-----------------|
+ * |QASYMM8_SIGNED|QASYMM8 |S32 |QSYMM16|QASYMM8_SIGNED|QSYMM16|QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
* @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index a7fadfc7cd..eeca2bb1db 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -56,20 +56,12 @@ public:
* - All
*
* Valid data type configurations:
- * |src |dst |
- * |:------------------|:--------------|
- * |QASYMM8 |QASYMM8 |
- * |QASYMM8 |QASYMM8_SIGNED |
- * |QASYMM8 |QASYMM16 |
- * |QASYMM8_SIGNED |QASYMM8 |
- * |QASYMM8_SIGNED |QASYMM8_SIGNED |
- * |QASYMM8_SIGNED |QASYMM16 |
- * |F16 |QASYMM8 |
- * |F16 |QASYMM8_SIGNED |
- * |F16 |QASYMM16 |
- * |F32 |QASYMM8 |
- * |F32 |QASYMM8_SIGNED |
- * |F32 |QASYMM16 |
+ * |src |dst |
+ * |:------------------|:--------------------------------------|
+ * |QASYMM8 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |QASYMM8_SIGNED |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |F16 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
+ * |F32 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
*
* @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
* @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index 66f7f2ea3f..667d3144ac 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -54,6 +54,16 @@ public:
~NERNNLayer();
/** Initialize the function
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |src3 |dst0 |dst1 |
+ * |:------|:------|:------|:------|:------|:------|
+ * |F16 |F16 |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |F32 |F32 |
+ *
* @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
* @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input
* @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the current 'state'. Data types supported: Same as @p input
diff --git a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
index c72cd494d2..ea1af4daea 100644
--- a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
@@ -38,10 +38,21 @@ class NEROIAlignLayer : public INESimpleFunctionNoBorder
public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM16 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM16 |QASYMM8_SIGNED |
+ *
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
* as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
- * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input
+ * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
*
@@ -54,7 +65,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel
*
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8,
+ * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED,
* otherwise same as @p input
* @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
index 214dd43402..2992b3eb95 100644
--- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
@@ -53,6 +53,15 @@ public:
~NEROIPoolingLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F32 |U16 |F32 |
+ * |QASYMM8 |U16 |QASYMM8 |
+ *
* @param[in] input Source tensor. Data types supported: QASYMM8/F32
* @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
* as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
diff --git a/arm_compute/runtime/NEON/functions/NERange.h b/arm_compute/runtime/NEON/functions/NERange.h
index 28976001d7..cb14c8fdde 100644
--- a/arm_compute/runtime/NEON/functions/NERange.h
+++ b/arm_compute/runtime/NEON/functions/NERange.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -57,6 +57,21 @@ public:
~NERange();
/** Initialize the kernel's start, end, step and output tensor.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |dst |
+ * |:---------|
+ * |U8 |
+ * |S8 |
+ * |U16 |
+ * |S16 |
+ * |U32 |
+ * |S32 |
+ * |F16 |
+ * |F32 |
+ *
* @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
* @param[in] start The starting value of the sequence.
* @param[in] end The ending (not including) value of the sequence.
diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h
index 89cd09812b..7512115a3f 100644
--- a/arm_compute/runtime/NEON/functions/NEReduceMean.h
+++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,6 +54,17 @@ public:
~NEReduceMean();
/** Configure kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
* @note Supported tensor rank: up to 4
*
* @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32
diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
index b96b70926c..533c10adcf 100644
--- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h
+++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
@@ -58,7 +58,19 @@ public:
~NEReductionOperation();
/** Set the input and output tensors.
*
- * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. (Written to only for border_size != 0)
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |QASYMM8 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+ * |F16 |F16 |
+ * |F32 |F32 |
+ * |S32 |S32 |
+ *
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. (Written to only for border_size != 0)
* @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
* @param[in] axis Dimension along which to reduce. Supported reduction axis : 0
* @param[in] op Reduction operation to perform.
@@ -68,7 +80,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation.
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32.
* @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
* @param[in] axis Dimension along which to reduce. Supported reduction axis : 0
* @param[in] op Reduction operation to perform.
diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h
index 835ebfab7e..1693078f66 100644
--- a/arm_compute/runtime/NEON/functions/NERemap.h
+++ b/arm_compute/runtime/NEON/functions/NERemap.h
@@ -44,6 +44,14 @@ class NERemap : public INESimpleFunction
public:
/** Initialise the function's sources, destination, interpolation policy and border mode.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:------|:------|:------|:------|
+ * |U8 |F32 |F32 |U 8 |
+ *
* @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
* @param[in] map_x Map for X coordinates. Data type supported: F32.
* @param[in] map_y Map for Y coordinates. Data type supported: F32.
diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
index f76d1d252c..0a7d824d10 100644
--- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,15 @@ class NEReorgLayer : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and outputs
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input First tensor input. Data type supported: All
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] stride Stride to be used during data re-organization
diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h
index 2048dafcb5..c02fff54a5 100644
--- a/arm_compute/runtime/NEON/functions/NEReverse.h
+++ b/arm_compute/runtime/NEON/functions/NEReverse.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,14 @@ class NEReverse : public INESimpleFunctionNoBorder
public:
/** Initialize the function
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |dst |
+ * |:--------------|:--------------|:--------------|
+ * |All |U32 |All |
+ *
* @param[in] input Input tensor. Data types supported: All
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h
index c66fbfa7d4..c8e5a204dd 100644
--- a/arm_compute/runtime/NEON/functions/NESelect.h
+++ b/arm_compute/runtime/NEON/functions/NESelect.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,14 @@ class NESelect : public INESimpleFunctionNoBorder
public:
/** Initialise the kernel's inputs and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:------|:--------------|
+ * |U8 |All |All |All |
+ *
* @param[in] c Condition input tensor. Data types supported: U8.
* @param[in] x First input tensor. Data types supported: All.
* @param[in] y Second input tensor. Data types supported: Same as @p x
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
index 27c1ddf8e3..ad8c1467d0 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
@@ -58,6 +58,15 @@ public:
~NESpaceToBatchLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:---------|:---------|:---------|:---------|
+ * |All |S32 |S32 |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[in] block_shape 1-D tensor with shape [M]. Supported M: 2. Data types supported: S32
* @param[in] paddings 2-D tensor with shape [2, M] (First dimension is the fastest-changing dimension). Supported M: 2. Data types supported: S32
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
index 73c228d8ee..1820cb8f6b 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
@@ -53,6 +53,15 @@ public:
~NESpaceToDepthLayer();
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value
diff --git a/arm_compute/runtime/NEON/functions/NESplit.h b/arm_compute/runtime/NEON/functions/NESplit.h
index ede5ecf65a..206f299c06 100644
--- a/arm_compute/runtime/NEON/functions/NESplit.h
+++ b/arm_compute/runtime/NEON/functions/NESplit.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,18 @@ namespace arm_compute
class NESplit : public CPPSplit<NESlice>
{
public:
+ /** NESplit
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
+ */
+
// Inherited methods overridden:
void run() override;
};
diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h
index f6fa4f2eb3..ae4e468f21 100644
--- a/arm_compute/runtime/NEON/functions/NEStackLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,6 +58,14 @@ public:
~NEStackLayer();
/** Initialise the kernel's inputs vector and output.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @note Supported input tensor rank: up to 4
*
* @param[in] input The vectors containing all the tensors with the same shape to stack. Data types supported: All
diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h
index d5ce76c9cf..915e5aa1da 100644
--- a/arm_compute/runtime/NEON/functions/NETile.h
+++ b/arm_compute/runtime/NEON/functions/NETile.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,14 @@ class NETile : public INESimpleFunctionNoBorder
public:
/** Set the source, destination of the kernel
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input Source tensor. Data type supported: All.
* @param[out] output Destination tensor. Same as @p input
* @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
diff --git a/arm_compute/runtime/NEON/functions/NEUnstack.h b/arm_compute/runtime/NEON/functions/NEUnstack.h
index c8e85115f7..079fee5b9e 100644
--- a/arm_compute/runtime/NEON/functions/NEUnstack.h
+++ b/arm_compute/runtime/NEON/functions/NEUnstack.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -57,6 +57,14 @@ public:
~NEUnstack() = default;
/** Set the input, output and unstacking axis.
*
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |All |All |
+ *
* @param[in] input A tensor to be unstacked. Data type supported: All.
* @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input.
* Note: The number of elements of the vector will be used as the number of slices to be taken from the axis.
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 3367b10a96..77f9093ed4 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -66,6 +66,16 @@ public:
/** Set the input and output tensors.
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:--------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ *
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: F16/F32.
diff --git a/arm_compute/runtime/OperatorList.h b/arm_compute/runtime/OperatorList.h
index 8c43c68b90..a659a79423 100644
--- a/arm_compute/runtime/OperatorList.h
+++ b/arm_compute/runtime/OperatorList.h
@@ -40,7 +40,7 @@
*
*/
-/** ArgMinMaxLayer (not ported)
+/** ArgMinMaxLayer
*
* Description:
* Function to calculate the index of the minimum or maximum values in a tensor based on an axis.
@@ -71,27 +71,27 @@
*
*/
-/** BatchNormalizationLayer (not ported)
+/** BatchNormalizationLayer
*
* Description:
- * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f]
+ * Function to perform batch normalization.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
-/** BatchToSpaceLayer (not ported)
+/** BatchToSpaceLayer
*
* Description:
- * Rearranges (permutes) data from batch into blocks of spatial data, followed by cropping. It is the reverse transformation of SpaceToBatch (from TF website)
+ * Batch to space transformation.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_BATCH_TO_SPACE_ND
*
*/
-/** BitwiseAnd (not ported)
+/** BitwiseAnd
*
* Description:
* Function to performe bitwise AND between 2 tensors.
@@ -101,7 +101,7 @@
*
*/
-/** BitwiseNot (not ported)
+/** BitwiseNot
*
* Description:
* Function to performe bitwise NOT.
@@ -111,7 +111,7 @@
*
*/
-/** BitwiseOr (not ported)
+/** BitwiseOr
*
* Description:
* Function to performe bitwise OR between 2 tensors.
@@ -121,27 +121,27 @@
*
*/
-/** BitwiseXor (not ported)
+/** BitwiseXor
*
* Description:
* Function to performe bitwise XOR between 2 tensors.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
-/** BoundingBoxTransform (not ported)
+/** BoundingBoxTransform
*
* Description:
- * Function to .
+ * Transform proposal bounding boxes to target bounding box using bounding box deltas.
*
* Equivalent Android NNAPI Op:
- * ?
+ * n/a
*
*/
-/** Cast (not ported)
+/** Cast
*
* Description:
* Function to cast a tensor.
@@ -151,20 +151,20 @@
*
*/
-/** ChannelShuffelLayer (not ported)
+/** ChannelShuffleLayer
*
* Description:
- * Function to cast a tensor.
+ * Function to shuffle the channels of the input tensor.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_CHANNEL_SHUFFLE
*
*/
-/** Comparison (not ported) (only CL)
+/** Comparison (only CL)
*
* Description:
- * Function to cast a tensor.
+ * Function to compare 2 tensors.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_EQUAL
@@ -192,11 +192,11 @@
* Function to tranpose the wieghts for the fully connected layer.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
-/** ConvolutionLayer (not ported)
+/** ConvolutionLayer
*
* Description:
* Function to compute a convolution layer.
@@ -212,74 +212,74 @@
* Function to copy a tensor.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
/** Crop (only CL)
*
* Description:
- * Function to .
+ * Performs a copy of input tensor to the output tensor.
*
* Equivalent Android NNAPI Op:
- * ?
+ * n/a
*
*/
-/** CropResize (not ported)
+/** CropResize
*
* Description:
- * Function to .
+ * Function to perform cropping and resizing.
*
* Equivalent Android NNAPI Op:
- * ?
+ * n/a
*
*/
-/** DeconvolutionLayer (not ported)
+/** DeconvolutionLayer
*
* Description:
- * Function to .
+ * Function to compute a deconvolution or tranpose convolution.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_TRANSPOSE_CONV_2D
*
*/
-/** DeconvolutionLayerUpsample (only CL) (not ported)
+/** DeconvolutionLayerUpsample (only CL)
*
* Description:
- * Function to .
+ * Function to execute deconvolution upsample on OpenCL.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_TRANSPOSE_CONV_2D
*
*/
-/** DepthConverterLayer (not ported)
+/** DepthConvertLayer
*
* Description:
- * Function to .
+ * Performs a down-scaling depth conversion.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
-/** DepthToSpaceLayer (not ported)
+/** DepthToSpaceLayer
*
* Description:
- * Function to .
+ * Depth to Space transformation.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_DEPTH_TO_SPACE
*
*/
-/** DepthwiseConvolutionLayer (not ported)
+/** DepthwiseConvolutionLayer
*
* Description:
- * Function to perform depthwise separable convolution
+ * Function to perform depthwise separable convolution.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_DEPTHWISE_CONV_2D
@@ -289,17 +289,17 @@
/** DequantizationLayer
*
* Description:
- * Function to dequantize the values in a tensor
+ * Function to dequantize the values in a tensor.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_DEQUANTIZE
*
*/
-/** DetectionPostProcessLayer (not ported) (no CL)
+/** DetectionPostProcessLayer (no CL)
*
* Description:
- * Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS)
+ * Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS).
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_DETECTION_POSTPROCESSING
@@ -309,7 +309,7 @@
/** DirectConvolutionLayer
*
* Description:
- * Function to
+ * Function to compute direct convolution.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_CONV_2D
@@ -319,7 +319,7 @@
/** DirectDeconvolutionLayer (only CL)
*
* Description:
- * Function to
+ * Function to run the deconvolution layer.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_TRANSPOSE_CONV_2D
@@ -387,27 +387,27 @@
/** FFT1D
*
* Description:
- * Fast Fourier Transform 1D
+ * Fast Fourier Transform 1D.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
/** FFT2D
*
* Description:
- * Fast Fourier Transform 2D
+ * Fast Fourier Transform 2D.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
/** FFTConvolutionLayer
*
* Description:
- * Fast Fourier Transform Convolution
+ * Fast Fourier Transform Convolution.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_CONV_2D
@@ -417,24 +417,24 @@
/** Fill
*
* Description:
- * Set the values of a tensor with a given value
+ * Set the values of a tensor with a given value.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_FILL
*
*/
-/** FillBorder (not ported)
+/** FillBorder
*
* Description:
- *
+ * Function to .
*
* Equivalent Android NNAPI Op:
- * ?
+ * n/a
*
*/
-/** FlattenLayer (not ported)
+/** FlattenLayer
*
* Description:
* Reshape a tensor to be 1D
@@ -447,104 +447,104 @@
/** Floor
*
* Description:
- * Round the value to the lowest number
+ * Round the value to the lowest number.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_FLOOR
*
*/
-/** FullyConnectedLayer (not ported)
+/** FullyConnectedLayer
*
* Description:
- * Function to perform a fully connected / dense layer
+ * Function to perform a fully connected / dense layer.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_FULLY_CONNECTED
*
*/
-/** FuseBatchNormalization (not ported)
+/** FuseBatchNormalization
*
* Description:
- * Function to .
+ * Function to fuse the batch normalization node to a preceding convolution node.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
-/** Gather (not ported)
+/** Gather
*
* Description:
- * Function to .
+ * Performs the Gather operation along the chosen axis.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_GATHER
*
*/
-/** GEMM (not ported)
+/** GEMM
*
* Description:
* General Matrix Multiplication.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
-/** GEMMConv2D (not ported) (no CL)
+/** GEMMConv2D (no CL)
*
* Description:
* General Matrix Multiplication.
*
* Equivalent Android NNAPI Op:
- * None
+ * ANEURALNETWORKS_CONV_2D
*
*/
-/** GEMMConvolutionLayer (not ported)
+/** GEMMConvolutionLayer
*
* Description:
* General Matrix Multiplication.
*
* Equivalent Android NNAPI Op:
- * None
+ * ANEURALNETWORKS_CONV_2D
*
*/
-/** GEMMDeconvolutionLayer (not ported) (only CL)
+/** GEMMDeconvolutionLayer (only CL)
*
* Description:
* General Matrix Multiplication.
*
* Equivalent Android NNAPI Op:
- * None
+ * ANEURALNETWORKS_TRANSPOSE_CONV_2D
*
*/
-/** GEMMLowpMatrixMultiplyCore (not ported)
+/** GEMMLowpMatrixMultiplyCore
*
* Description:
* General Matrix Multiplication.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
-/** GEMMLowpOutputStage (not ported)
+/** GEMMLowpOutputStage
*
* Description:
* General Matrix Multiplication.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
-/** GenerateProposalsLayer (not ported)
+/** GenerateProposalsLayer
*
* Description:
* Function to generate proposals for a RPN (Region Proposal Network).
@@ -554,7 +554,7 @@
*
*/
-/** InstanceNormalizationLayer (not ported)
+/** InstanceNormalizationLayer
*
* Description:
* Function to perform a Instance normalization on a given axis.
@@ -564,7 +564,7 @@
*
*/
-/** L2NormalizationLayer (not ported)
+/** L2NormalizeLayer
*
* Description:
* Function to perform a L2 normalization on a given axis.
@@ -583,102 +583,92 @@
* - Logical NOT
*
* Equivalent Android NNAPI Op:
- * None?
+ * n/a
*
*/
/** LogicalAnd (only CL)
*
* Description:
- * Function to perform Logical AND
+ * Function to perform Logical AND.
*
* Equivalent Android NNAPI Op:
- * None?
+ * n/a
*
*/
/** LogicalOr (only CL)
*
* Description:
- * Function to perform Logical OR
+ * Function to perform Logical OR.
*
* Equivalent Android NNAPI Op:
- * None?
+ * n/a
*
*/
/** LogicalNot (only CL)
*
* Description:
- * Function to perform Logical NOT
+ * Function to perform Logical NOT.
*
* Equivalent Android NNAPI Op:
- * None?
+ * n/a
*
*/
-/** LSTMLayer (not ported)
+/** LSTMLayer
*
* Description:
- * Function to perform LSTM
+ * Function to perform a single time step in a Long Short-Term Memory (LSTM) layer.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_LSTM
*
*/
-/** LSTMLayerQuantized (not ported)
+/** LSTMLayerQuantized
*
* Description:
- * Function to perform LSTM
+ * Function to perform quantized LSTM (Long Short-Term Memory)
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_QUANTIZED_LSTM
- * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM ?
+ * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM
*
*/
-/** MaxUnpoolingLayer (not ported)
+/** MaxUnpoolingLayer
*
* Description:
- * Function to perform MaxUnpooling
+ * Function to perform MaxUnpooling.
*
* Equivalent Android NNAPI Op:
- * ?
+ * n/a
*
*/
-/** MeanStdDevNormalizationLayer (not ported)
+/** MeanStdDevNormalizationLayer
*
* Description:
* Function to execute mean and standard deviation normalization.
*
* Equivalent Android NNAPI Op:
- * None ?
+ * n/a
*
*/
-/** MeanStdDevNormalizationLayer (not ported)
- *
- * Description:
- * Function to execute mean and standard deviation normalization.
- *
- * Equivalent Android NNAPI Op:
- * None ?
- *
- */
-
-/** NormalizationLayer (not ported)
+/** NormalizationLayer
*
* Description:
* Function to compute normalization layer.
*
* Equivalent Android NNAPI Op:
- * None ?
+ * ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION
*
*/
-/** PadLayer (not ported)
+/** PadLayer
*
* Description:
* Function to pad a tensor.
@@ -731,24 +721,24 @@
*
*/
-/** PriorBoxLayer (not ported)
+/** PriorBoxLayer
*
* Description:
- * Function to compute the activation layer with the PRELU activation function.
+ * Function to .
*
* Equivalent Android NNAPI Op:
- * ?
+ * n/a
*
*/
-/** QLSTMLayer (not ported)
+/** QLSTMLayer
*
* Description:
- * Function to perform LSTM
+ * Function to perform quantized LSTM (Long Short-Term Memory).
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_QUANTIZED_LSTM
- * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM ?
+ * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM
*
*/
@@ -762,17 +752,17 @@
*
*/
-/** Range (not ported)
+/** Range
*
* Description:
- * Function to .
+ * Function to generates a sequence of numbers starting from START and extends by increments of 'STEP' up to but not including 'END'.
*
* Equivalent Android NNAPI Op:
- * none?
+ * n/a
*
*/
-/** RecudeMean (not ported)
+/** ReduceMean
*
* Description:
* Function to performe reduce mean operation.
@@ -782,22 +772,7 @@
*
*/
-/** RecudeOperation (not ported)
- *
- * Description:
- * Function to performe reduce mean operation.
- *
- * Equivalent Android NNAPI Op:
- * ANEURALNETWORKS_REDUCE_ALL
- * ANEURALNETWORKS_REDUCE_ANY
- * ANEURALNETWORKS_REDUCE_MAX
- * ANEURALNETWORKS_REDUCE_MIN
- * ANEURALNETWORKS_REDUCE_PROD
- * ANEURALNETWORKS_REDUCE_SUM
- *
- */
-
-/** RecudeOperation (not ported)
+/** ReductionOperation
*
* Description:
* Function to performe reduce with the following operations
@@ -820,20 +795,20 @@
*
*/
-/** ReorgLayer (not ported)
+/** ReorgLayer
*
* Description:
- * Function to performe reorg
+ * Performs a reorganization layer of input tensor to the output tensor.
*
* Equivalent Android NNAPI Op:
- * None?
+ * n/a
*
*/
/** ReshapeLayer
*
* Description:
- * Fucntion to reshape a tensor
+ * Function to reshape a tensor.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_RESHAPE
@@ -841,40 +816,40 @@
*
*/
-/** ReverseLayer (not ported)
+/** Reverse
*
* Description:
- * Fucntion to .
+ * Function to reverse tensor according to axis.
*
* Equivalent Android NNAPI Op:
- * None?
+ * n/a
*
*/
-/** RNNLayer (not ported)
+/** RNNLayer
*
* Description:
- * Fucntion to perform RNN .
+ * Function to perform recurrent neural network layer.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_RNN
*
*/
-/** ROIAligmentLayer (not ported)
+/** ROIAlignLayer
*
* Description:
- * Fucntion to perform RNN .
+ * Function to perform ROI alignment.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_ROI_ALIGN
*
*/
-/** ROIPoolingLayer (not ported)
+/** ROIPoolingLayer
*
* Description:
- * Fucntion to perform RNN .
+ * Function to perform ROI pooling.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_ROI_POOLING
@@ -884,8 +859,8 @@
/** Scale
*
* Description:
- * Fucntion to perform resize a tensor using to interpolate:
- * - Bilenear
+ * Function to perform resize a tensor using to interpolate:
+ * - Bilinear
* - Nearest neighbor
*
* Equivalent Android NNAPI Op:
@@ -894,10 +869,10 @@
*
*/
-/** Select (not ported)
+/** Select
*
* Description:
- * Fucntion to select values from 2 tensors depending on an input tensor of booleans.
+ * Function to select values from 2 tensors depending on an input tensor of booleans.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_SELECT
@@ -925,7 +900,7 @@
*
*/
-/** SpaceToBatchLayer (not ported)
+/** SpaceToBatchLayer
*
* Description:
* Function to divide a tensor spatially.
@@ -935,7 +910,7 @@
*
*/
-/** SpaceToDepthLayer (not ported)
+/** SpaceToDepthLayer
*
* Description:
* Function to rearrange blocks of spatial data into depth.
@@ -945,7 +920,7 @@
*
*/
-/** Split (not ported)
+/** Split
*
* Description:
* Function to split a tensor along a given axis.
@@ -955,13 +930,13 @@
*
*/
-/** StackLayer (not ported)
+/** StackLayer
*
* Description:
* Function to stack tensors along an axis.
*
* Equivalent Android NNAPI Op:
- * none
+ * n/a
*
*/
@@ -975,7 +950,7 @@
*
*/
-/** Tile (not ported)
+/** Tile
*
* Description:
* Function to construct a tensor by tiling a given tensor.
@@ -988,40 +963,40 @@
/** Transpose
*
* Description:
- * Function to transpose an 2D tensor.
+ * Function to transpose a 2D tensor.
*
* Equivalent Android NNAPI Op:
* ANEURALNETWORKS_TRANSPOSE
*
*/
-/** Unstack (not ported)
+/** Unstack
*
* Description:
* Function to unpack a rank-R tensor into rank-(R-1) tensors.
*
* Equivalent Android NNAPI Op:
- * none
+ * n/a
*
*/
-/** WinogradConvolutionLayer (not ported)
+/** WinogradConvolutionLayer
*
* Description:
- * Function to.
+ * Function to do Winograd Convolution.
*
* Equivalent Android NNAPI Op:
- * None
+ * ANEURALNETWORKS_CONV_2D
*
*/
-/** WinogradInputTransform (not ported) (only CL)
+/** WinogradInputTransform (only CL)
*
* Description:
* Function to.
*
* Equivalent Android NNAPI Op:
- * None
+ * n/a
*
*/
diff --git a/docs/09_operators_list.dox b/docs/09_operators_list.dox
index 82a127bbd3..244f292f82 100644
--- a/docs/09_operators_list.dox
+++ b/docs/09_operators_list.dox
@@ -108,6 +108,320 @@ where N = batches, C = channels, H = height, W = width
<tr><td>F32<td>F32
</table>
<tr>
+ <td rowspan="2">ArgMinMaxLayer
+ <td rowspan="2" style="width:200px;"> Function to calculate the index of the minimum or maximum values in a tensor based on an axis.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_ARGMAX
+ <li>ANEURALNETWORKS_ARGMIN
+ </ul>
+ <td>NEArgMinMaxLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>QASYMM8<td>U32, S32
+ <tr><td>QASYMM8_SIGNED<td>U32, S32
+ <tr><td>S32<td>U32, S32
+ <tr><td>F16<td>U32, S32
+ <tr><td>F32<td>U32, S32
+ </table>
+<tr>
+ <td>CLArgMinMaxLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>QASYMM8<td>U32, S32
+ <tr><td>QASYMM8_SIGNED<td>U32, S32
+ <tr><td>S32<td>U32, S32
+ <tr><td>F16<td>U32, S32
+ <tr><td>F32<td>U32, S32
+ </table>
+<tr>
+ <td rowspan="2">BatchNormalizationLayer
+ <td rowspan="2" style="width:200px;"> Function to perform batch normalization.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEBatchNormalizationLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F32<td>F32
+ <tr><td>F16<td>F16
+ </table>
+<tr>
+ <td>CLBatchNormalizationLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F32<td>F32
+ <tr><td>F16<td>F16
+ </table>
+<tr>
+ <td rowspan="2">BatchToSpaceLayer
+ <td rowspan="2" style="width:200px;"> Batch to space transformation.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_BATCH_TO_SPACE_ND
+ </ul>
+ <td>NEBatchToSpaceLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>All<td>s32<td>All
+ </table>
+<tr>
+ <td>CLBatchToSpaceLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>All<td>s32<td>All
+ </table>
+<tr>
+ <td rowspan="2">BitwiseAnd
+ <td rowspan="2" style="width:200px;"> Function to performe bitwise AND between 2 tensors.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_LOGICAL_AND
+ </ul>
+ <td>NEBitwiseAnd
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>U8
+ </table>
+<tr>
+ <td>CLBitwiseAnd
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>U8
+ </table>
+<tr>
+ <td rowspan="2">BitwiseNot
+ <td rowspan="2" style="width:200px;"> Function to performe bitwise NOT.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_LOGICAL_NOT
+ </ul>
+ <td>NEBitwiseNot
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>U8
+ </table>
+<tr>
+ <td>CLBitwiseNot
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>U8
+ </table>
+<tr>
+ <td rowspan="2">BitwiseOr
+ <td rowspan="2" style="width:200px;"> Function to performe bitwise OR between 2 tensors.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_LOGICAL_OR
+ </ul>
+ <td>NEBitwiseOr
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>U8
+ </table>
+<tr>
+ <td>CLBitwiseOr
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>U8
+ </table>
+<tr>
+ <td rowspan="2">BitwiseXor
+ <td rowspan="2" style="width:200px;"> Function to performe bitwise XOR between 2 tensors.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEBitwiseXor
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>U8
+ </table>
+<tr>
+ <td>CLBitwiseXor
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>U8
+ </table>
+<tr>
+ <td rowspan="2">BoundingBoxTransform
+ <td rowspan="2" style="width:200px;"> Transform proposal bounding boxes to target bounding box using bounding box deltas.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEBoundingBoxTransform
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM16<td>QASYMM8<td>QASYMM16
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>CLBoundingBoxTransform
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>QASYMM16<td>QASYMM8<td>QASYMM16
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td rowspan="2">Cast
+ <td rowspan="2" style="width:200px;"> Function to cast a tensor.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_CAST
+ </ul>
+ <td>NECast
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>QASYMM8_SIGNED<td>S16, S32, F32, F16
+ <tr><td>QASYMM8<td>U16, S16, S32, F32, F16
+ <tr><td>U8<td>U16, S16, S32, F32, F16
+ <tr><td>U16<td>U8, U32
+ <tr><td>S16<td>QASYMM8_SIGNED, U8, S32
+ <tr><td>F16<td>QASYMM8_SIGNED, QASYMM8, F32, S32, U8
+ <tr><td>S32<td>QASYMM8_SIGNED, QASYMM8, F16, F32, U8
+ <tr><td>F32<td>QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8
+ </table>
+<tr>
+ <td>CLCast
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>S8, U16, S16, U32, S32, F16, F32
+ <tr><td>U16<td>U8, S8, S16, U32, S32, F16, F32
+ <tr><td>S16<td>U8, S8, U16, U32, S32, F16, F32
+ <tr><td>U32<td>U8, S8, U16, S16, S32, F16, F32
+ <tr><td>S32<td>U8, S8, U16, S16, U32, F16, F32
+ <tr><td>F16<td>U8, S8, U16, S16, U32, F32
+ <tr><td>F32<td>U8, S8, U16, S16, U32, F16
+ </table>
+<tr>
+ <td rowspan="2">ChannelShuffleLayer
+ <td rowspan="2" style="width:200px;"> Function to shuffle the channels of the input tensor.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_CHANNEL_SHUFFLE
+ </ul>
+ <td>NEChannelShuffleLayer
+ <td>
+ <ul>
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLChannelShuffleLayer
+ <td>
+ <ul>
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
<td rowspan="2">ConcatenateLayer
<td rowspan="2" style="width:200px;"> Function to concatenate tensors along a given axis.
<td rowspan="2">
@@ -146,7 +460,7 @@ where N = batches, C = channels, H = height, W = width
<td rowspan="2" style="width:200px;"> Function to tranpose the wieghts for the fully connected layer.
<td rowspan="2">
<ul>
- <li>None
+ <li>n/a
</ul>
<td>NEConvertFullyConnectedWeights
<td>
@@ -172,11 +486,51 @@ where N = batches, C = channels, H = height, W = width
<tr><td>All<td>All
</table>
<tr>
+ <td rowspan="2">ConvolutionLayer
+ <td rowspan="2" style="width:200px;"> Function to compute a convolution layer.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_CONV_2D
+ </ul>
+ <td>NEConvolutionLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td>CLConvolutionLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
<td rowspan="2">Copy
<td rowspan="2" style="width:200px;"> Function to copy a tensor.
<td rowspan="2">
<ul>
- <li>None
+ <li>n/a
</ul>
<td>NECopy
<td>
@@ -200,8 +554,186 @@ where N = batches, C = channels, H = height, W = width
<tr><td>All<td>All
</table>
<tr>
+ <td rowspan="2">CropResize
+ <td rowspan="2" style="width:200px;"> Function to perform cropping and resizing.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NECropResize
+ <td>
+ <ul>
+ <li>NHWC
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>All<td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>CLCropResize
+ <td>
+ <ul>
+ <li>NHWC
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>All<td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td rowspan="2">DeconvolutionLayer
+ <td rowspan="2" style="width:200px;"> Function to compute a deconvolution or tranpose convolution.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_TRANSPOSE_CONV_2D
+ </ul>
+ <td>NEDeconvolutionLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td>CLDeconvolutionLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td rowspan="2">DepthConvertLayer
+ <td rowspan="2" style="width:200px;"> Performs a down-scaling depth conversion.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEDepthConvertLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>QASYMM8<td>F16, F32
+ <tr><td>U8<td>U16, S16, S32
+ <tr><td>U16<td>U8, U32
+ <tr><td>S16<td>U8, S32
+ <tr><td>BFLOAT16<td>F32
+ <tr><td>F16<td>QASYMM8, F32
+ <tr><td>F32<td>QASYMM8, F16, BFLOAT16
+ </table>
+<tr>
+ <td>CLDepthConvertLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>U8<td>S8, U16, S16, U32, S32, F16, F32
+ <tr><td>U16<td>U8, S8, S16, U32, S32, F16, F32
+ <tr><td>S16<td>U8, S8, U16, U32, S32, F16, F32
+ <tr><td>U32<td>U8, S8, U16, S16, S32, F16, F32
+ <tr><td>S32<td>U8, S8, U16, S16, U32, F16, F32
+ <tr><td>F16<td>U8, S8, U16, S16, U32, F32
+ <tr><td>F32<td>U8, S8, U16, S16, U32, F16
+ </table>
+<tr>
+ <td rowspan="2">DepthToSpaceLayer
+ <td rowspan="2" style="width:200px;"> Depth to Space transformation.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_DEPTH_TO_SPACE
+ </ul>
+ <td>NEDepthToSpaceLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLDepthToSpaceLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td rowspan="2">DepthwiseConvolutionLayer
+ <td rowspan="2" style="width:200px;"> Function to perform depthwise separable convolution.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_DEPTHWISE_CONV_2D
+ </ul>
+ <td>NEDepthwiseConvolutionLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td>CLDepthwiseConvolutionLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
<td rowspan="2">DequantizationLayer
- <td rowspan="2" style="width:200px;"> Function to dequantize the values in a tensor
+ <td rowspan="2" style="width:200px;"> Function to dequantize the values in a tensor.
<td rowspan="2">
<ul>
<li>ANEURALNETWORKS_DEQUANTIZE
@@ -214,16 +746,11 @@ where N = batches, C = channels, H = height, W = width
<td>
<table>
<tr><th>src<th>dst
- <tr><td>QASYMM8<td>F16
- <tr><td>QASYMM8<td>F32
- <tr><td>QASYMM8_SIGNED<td>F16
- <tr><td>QASYMM8_SIGNED<td>F32
- <tr><td>QSYMM8_PER_CHANNEL<td>F16
- <tr><td>QSYMM8_PER_CHANNEL<td>F32
- <tr><td>QSYMM8<td>F16
- <tr><td>QSYMM8<td>F32
- <tr><td>QSYMM16<td>F16
- <tr><td>QSYMM16<td>F32
+ <tr><td>QASYMM8<td>F16, F32
+ <tr><td>QASYMM8_SIGNED<td>F16, F32
+ <tr><td>QSYMM8_PER_CHANNEL<td>F16, F32
+ <tr><td>QSYMM8<td>F16, F32
+ <tr><td>QSYMM16<td>F16, F32
</table>
<tr>
<td>CLDequantizationLayer
@@ -234,20 +761,15 @@ where N = batches, C = channels, H = height, W = width
<td>
<table>
<tr><th>src<th>dst
- <tr><td>QASYMM8<td>F16
- <tr><td>QASYMM8<td>F32
- <tr><td>QASYMM8_SIGNED<td>F16
- <tr><td>QASYMM8_SIGNED<td>F32
- <tr><td>QSYMM8_PER_CHANNEL<td>F16
- <tr><td>QSYMM8_PER_CHANNEL<td>F32
- <tr><td>QSYMM8<td>F16
- <tr><td>QSYMM8<td>F32
- <tr><td>QSYMM16<td>F16
- <tr><td>QSYMM16<td>F32
+ <tr><td>QASYMM8<td>F16, F32
+ <tr><td>QASYMM8_SIGNED<td>F16, F32
+ <tr><td>QSYMM8_PER_CHANNEL<td>F16, F32
+ <tr><td>QSYMM8<td>F16, F32
+ <tr><td>QSYMM16<td>F16, F32
</table>
<tr>
<td rowspan="2">DirectConvolutionLayer
- <td rowspan="2" style="width:200px;"> Function to
+ <td rowspan="2" style="width:200px;"> Function to compute direct convolution.
<td rowspan="2">
<ul>
<li>ANEURALNETWORKS_CONV_2D
@@ -281,10 +803,10 @@ where N = batches, C = channels, H = height, W = width
</table>
<tr>
<td rowspan="2">FFT1D
- <td rowspan="2" style="width:200px;"> Fast Fourier Transform 1D
+ <td rowspan="2" style="width:200px;"> Fast Fourier Transform 1D.
<td rowspan="2">
<ul>
- <li>None
+ <li>n/a
</ul>
<td>NEFFT1D
<td>
@@ -310,10 +832,10 @@ where N = batches, C = channels, H = height, W = width
</table>
<tr>
<td rowspan="2">FFT2D
- <td rowspan="2" style="width:200px;"> Fast Fourier Transform 2D
+ <td rowspan="2" style="width:200px;"> Fast Fourier Transform 2D.
<td rowspan="2">
<ul>
- <li>None
+ <li>n/a
</ul>
<td>NEFFT2D
<td>
@@ -339,7 +861,7 @@ where N = batches, C = channels, H = height, W = width
</table>
<tr>
<td rowspan="2">FFTConvolutionLayer
- <td rowspan="2" style="width:200px;"> Fast Fourier Transform Convolution
+ <td rowspan="2" style="width:200px;"> Fast Fourier Transform Convolution.
<td rowspan="2">
<ul>
<li>ANEURALNETWORKS_CONV_2D
@@ -368,7 +890,7 @@ where N = batches, C = channels, H = height, W = width
</table>
<tr>
<td rowspan="2">Fill
- <td rowspan="2" style="width:200px;"> Set the values of a tensor with a given value
+ <td rowspan="2" style="width:200px;"> Set the values of a tensor with a given value.
<td rowspan="2">
<ul>
<li>ANEURALNETWORKS_FILL
@@ -395,8 +917,64 @@ where N = batches, C = channels, H = height, W = width
<tr><td>All<td>All
</table>
<tr>
+ <td rowspan="2">FillBorder
+ <td rowspan="2" style="width:200px;"> Function to .
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEFillBorder
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLFillBorder
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td rowspan="2">FlattenLayer
+ <td rowspan="2" style="width:200px;"> Reshape a tensor to be 1D
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_RESHAPE
+ </ul>
+ <td>NEFlattenLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLFlattenLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
<td rowspan="2">Floor
- <td rowspan="2" style="width:200px;"> Round the value to the lowest number
+ <td rowspan="2" style="width:200px;"> Round the value to the lowest number.
<td rowspan="2">
<ul>
<li>ANEURALNETWORKS_FLOOR
@@ -425,6 +1003,512 @@ where N = batches, C = channels, H = height, W = width
<tr><td>F16<td>F16
</table>
<tr>
+ <td rowspan="2">FullyConnectedLayer
+ <td rowspan="2" style="width:200px;"> Function to perform a fully connected / dense layer.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_FULLY_CONNECTED
+ </ul>
+ <td>NEFullyConnectedLayerReshapeWeightsManaged
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td>CLFullyConnectedLayerReshapeWeightsManaged
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td rowspan="2">FuseBatchNormalization
+ <td rowspan="2" style="width:200px;"> Function to fuse the batch normalization node to a preceding convolution node.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEFuseBatchNormalization
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F32<td>F32
+ <tr><td>F16<td>F16
+ </table>
+<tr>
+ <td>CLFuseBatchNormalization
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F32<td>F32
+ <tr><td>F16<td>F16
+ </table>
+<tr>
+ <td rowspan="2">Gather
+ <td rowspan="2" style="width:200px;"> Performs the Gather operation along the chosen axis.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_GATHER
+ </ul>
+ <td>NEGather
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLGather
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td rowspan="2">GEMM
+ <td rowspan="2" style="width:200px;"> General Matrix Multiplication.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEGEMM
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>BFLOAT16<td>BFLOAT16<td>BFLOAT16<td>BFLOAT16
+ </table>
+<tr>
+ <td>CLGEMMReshapeRHSMatrixKernelManaged
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>F16<td>F16<td>F16<td>F16
+ </table>
+<tr>
+ <td rowspan="2">GEMMConvolutionLayer
+ <td rowspan="2" style="width:200px;"> General Matrix Multiplication.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_CONV_2D
+ </ul>
+ <td>NEConvolutionLayerReshapeWeights
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>BFLOAT16<td>BFLOAT16<td>BFLOAT16<td>BFLOAT16
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td>CLConvolutionLayerReshapeWeights
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td rowspan="2">GEMMLowpMatrixMultiplyCore
+ <td rowspan="2" style="width:200px;"> General Matrix Multiplication.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEGEMMLowpMatrixMultiplyCore
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>S32
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>S32
+ <tr><td>QASYMM8<td>QSYMM8<td>S32<td>S32
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>S32
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>S32
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8<td>S32<td>S32
+ </table>
+<tr>
+ <td>CLGEMMLowpMatrixMultiplyCore
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QSYMM8<td>S32<td>QASYMM8
+ <tr><td>QASYMM8<td>QASYMM8<td>S32<td>S32
+ <tr><td>QASYMM8<td>QSYMM8_PER_CHANNEL<td>S32<td>S32
+ <tr><td>QASYMM8<td>QSYMM8<td>S32<td>S32
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8<td>S32<td>QASYMM8_SIGNED
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>S32
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8_PER_CHANNEL<td>S32<td>S32
+ <tr><td>QASYMM8_SIGNED<td>QSYMM8<td>S32<td>S32
+ </table>
+<tr>
+ <td rowspan="2">GenerateProposalsLayer
+ <td rowspan="2" style="width:200px;"> Function to generate proposals for a RPN (Region Proposal Network).
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_GENERATE_PROPOSALS
+ </ul>
+ <td>NEGenerateProposalsLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QSYMM8<td>QSYMM16<td>QASYMM8
+ </table>
+<tr>
+ <td>CLGenerateProposalsLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QSYMM8<td>QSYMM16<td>QASYMM8
+ </table>
+<tr>
+ <td rowspan="2">InstanceNormalizationLayer
+ <td rowspan="2" style="width:200px;"> Function to perform a Instance normalization on a given axis.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_INSTANCE_NORMALIZATION
+ </ul>
+ <td>NEInstanceNormalizationLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td>CLInstanceNormalizationLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td rowspan="2">L2NormalizeLayer
+ <td rowspan="2" style="width:200px;"> Function to perform a L2 normalization on a given axis.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_L2_NORMALIZATION
+ </ul>
+ <td>NEL2NormalizeLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td>CLL2NormalizeLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td rowspan="2">LSTMLayer
+ <td rowspan="2" style="width:200px;"> Function to perform a single time step in a Long Short-Term Memory (LSTM) layer.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_LSTM
+ </ul>
+ <td>NELSTMLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0 - src13<th>dst0 - dst3
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td>CLLSTMLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0 - src13<th>dst0 - dst3
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td rowspan="2">LSTMLayerQuantized
+ <td rowspan="2" style="width:200px;"> Function to perform quantized LSTM (Long Short-Term Memory)
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_QUANTIZED_LSTM
+ <li>ANEURALNETWORKS_QUANTIZED_16BIT_LSTM
+ </ul>
+ <td>NELSTMLayerQuantized
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0 - src8<th>src9 - src12<th>src13<th>src14<th>dst0<th>dst1
+ <tr><td>QASYMM8<td>S32<td>QSYMM16<td>QASYMM8<td>QSYMM16<td>QASYMM8
+ </table>
+<tr>
+ <td>CLLSTMLayerQuantized
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0 - src8<th>src9 - src12<th>src13<th>src14<th>dst0<th>dst1
+ <tr><td>QASYMM8<td>S32<td>QSYMM16<td>QASYMM8<td>QSYMM16<td>QASYMM8
+ </table>
+<tr>
+ <td rowspan="2">MaxUnpoolingLayer
+ <td rowspan="2" style="width:200px;"> Function to perform MaxUnpooling.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEMaxUnpoolingLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td>CLMaxUnpoolingLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td rowspan="2">MeanStdDevNormalizationLayer
+ <td rowspan="2" style="width:200px;"> Function to execute mean and standard deviation normalization.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEMeanStdDevNormalizationLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F32<td>F32
+ <tr><td>F16<td>F16
+ </table>
+<tr>
+ <td>CLMeanStdDevNormalizationLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F32<td>F32
+ <tr><td>F16<td>F16
+ </table>
+<tr>
+ <td rowspan="2">NormalizationLayer
+ <td rowspan="2" style="width:200px;"> Function to compute normalization layer.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION
+ </ul>
+ <td>NENormalizationLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F32<td>F32
+ <tr><td>F16<td>F16
+ </table>
+<tr>
+ <td>CLNormalizationLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>F32<td>F32
+ <tr><td>F16<td>F16
+ </table>
+<tr>
+ <td rowspan="2">PadLayer
+ <td rowspan="2" style="width:200px;"> Function to pad a tensor.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_PAD
+ <li>ANEURALNETWORKS_PAD_V2
+ </ul>
+ <td>NEPadLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLPadLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
<td rowspan="2">Permute
<td rowspan="2" style="width:200px;"> Function to transpose an ND tensor.
<td rowspan="2">
@@ -575,6 +1659,65 @@ where N = batches, C = channels, H = height, W = width
<tr><td>F32<td>F32
</table>
<tr>
+ <td rowspan="2">PriorBoxLayer
+ <td rowspan="2" style="width:200px;"> Function to .
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEPriorBoxLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>CLPriorBoxLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td rowspan="2">QLSTMLayer
+ <td rowspan="2" style="width:200px;"> Function to perform quantized LSTM (Long Short-Term Memory).
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_QUANTIZED_LSTM
+ <li>ANEURALNETWORKS_QUANTIZED_16BIT_LSTM
+ </ul>
+ <td>NEQLSTMLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1 - src6<th>src7 -src9<th>src10<th>src11<th>dst0<th>dst1 - dst2
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8<td>S32<td>QSYMM16<td>QASYMM8_SIGNED<td>QSYMM16<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td>CLQLSTMLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1 - src6<th>src7 -src9<th>src10<th>src11<th>dst0<th>dst1 - dst2
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8<td>S32<td>QSYMM16<td>QASYMM8_SIGNED<td>QSYMM16<td>QASYMM8_SIGNED
+ </table>
+<tr>
<td rowspan="2">QuantizationLayer
<td rowspan="2" style="width:200px;"> Function to perform quantization layer
<td rowspan="2">
@@ -589,21 +1732,130 @@ where N = batches, C = channels, H = height, W = width
<td>
<table>
<tr><th>src<th>dst
+ <tr><td>QASYMM8<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
+ <tr><td>F16<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
+ <tr><td>F32<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
+ </table>
+<tr>
+ <td>CLQuantizationLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>QASYMM8<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
+ <tr><td>F16<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
+ <tr><td>F32<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
+ </table>
+<tr>
+ <td rowspan="2">Range
+ <td rowspan="2" style="width:200px;"> Function to generates a sequence of numbers starting from START and extends by increments of 'STEP' up to but not including 'END'.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NERange
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>dst
+ <tr><td>U8
+ <tr><td>S8
+ <tr><td>U16
+ <tr><td>S16
+ <tr><td>U32
+ <tr><td>S32
+ <tr><td>F16
+ <tr><td>F32
+ </table>
+<tr>
+ <td>CLRange
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>dst
+ <tr><td>U8
+ <tr><td>S8
+ <tr><td>QASYMM8
+ <tr><td>U16
+ <tr><td>S16
+ <tr><td>U32
+ <tr><td>S32
+ <tr><td>F16
+ <tr><td>F32
+ </table>
+<tr>
+ <td rowspan="2">ReduceMean
+ <td rowspan="2" style="width:200px;"> Function to performe reduce mean operation.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_MEAN
+ </ul>
+ <td>NEReduceMean
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
<tr><td>QASYMM8<td>QASYMM8
- <tr><td>QASYMM8<td>QASYMM8_SIGNED
- <tr><td>QASYMM8<td>QASYMM16
- <tr><td>QASYMM8_SIGNED<td>QASYMM8
<tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
- <tr><td>QASYMM8_SIGNED<td>QASYMM16
- <tr><td>F16<td>QASYMM8
- <tr><td>F16<td>QASYMM8_SIGNED
- <tr><td>F16<td>QASYMM16
- <tr><td>F32<td>QASYMM8
- <tr><td>F32<td>QASYMM8_SIGNED
- <tr><td>F32<td>QASYMM16
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
</table>
<tr>
- <td>CLQuantizationLayer
+ <td>CLReduceMean
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ </table>
+<tr>
+ <td rowspan="2">ReductionOperation
+ <td rowspan="2" style="width:200px;"> Function to performe reduce with the following operations - ARG_IDX_MAX: Index of the max value - ARG_IDX_MIN: Index of the min value - MEAN_SUM: Mean of sum - PROD: Product - SUM_SQUARE: Sum of squares - SUM: Sum - MIN: Min - MAX: Max
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_REDUCE_ALL
+ <li>ANEURALNETWORKS_REDUCE_ANY
+ <li>ANEURALNETWORKS_REDUCE_MAX
+ <li>ANEURALNETWORKS_REDUCE_MIN
+ <li>ANEURALNETWORKS_REDUCE_PROD
+ <li>ANEURALNETWORKS_REDUCE_SUM
+ </ul>
+ <td>NEReductionOperation
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>QASYMM8<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ <tr><td>S32<td>S32
+ </table>
+<tr>
+ <td>CLReductionOperation
<td>
<ul>
<li>All
@@ -612,21 +1864,44 @@ where N = batches, C = channels, H = height, W = width
<table>
<tr><th>src<th>dst
<tr><td>QASYMM8<td>QASYMM8
- <tr><td>QASYMM8<td>QASYMM8_SIGNED
- <tr><td>QASYMM8<td>QASYMM16
- <tr><td>QASYMM8_SIGNED<td>QASYMM8
<tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
- <tr><td>QASYMM8_SIGNED<td>QASYMM16
- <tr><td>F16<td>QASYMM8
- <tr><td>F16<td>QASYMM8_SIGNED
- <tr><td>F16<td>QASYMM16
- <tr><td>F32<td>QASYMM8
- <tr><td>F32<td>QASYMM8_SIGNED
- <tr><td>F32<td>QASYMM16
+ <tr><td>F16<td>F16
+ <tr><td>F32<td>F32
+ <tr><td>S32<td>S32
+ </table>
+<tr>
+ <td rowspan="2">ReorgLayer
+ <td rowspan="2" style="width:200px;"> Performs a reorganization layer of input tensor to the output tensor.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEReorgLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLReorgLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
</table>
<tr>
<td rowspan="2">ReshapeLayer
- <td rowspan="2" style="width:200px;"> Fucntion to reshape a tensor
+ <td rowspan="2" style="width:200px;"> Function to reshape a tensor.
<td rowspan="2">
<ul>
<li>ANEURALNETWORKS_RESHAPE
@@ -654,8 +1929,133 @@ where N = batches, C = channels, H = height, W = width
<tr><td>All<td>All
</table>
<tr>
+ <td rowspan="2">Reverse
+ <td rowspan="2" style="width:200px;"> Function to reverse tensor according to axis.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEReverse
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>All<td>U32<td>All
+ </table>
+<tr>
+ <td>CLReverse
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>All<td>U32<td>All
+ </table>
+<tr>
+ <td rowspan="2">RNNLayer
+ <td rowspan="2" style="width:200px;"> Function to perform recurrent neural network layer.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_RNN
+ </ul>
+ <td>NERNNLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>src3<th>dst0<th>dst1
+ <tr><td>F16<td>F16<td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>CLRNNLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>src3<th>dst0<th>dst1
+ <tr><td>F16<td>F16<td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td rowspan="2">ROIAlignLayer
+ <td rowspan="2" style="width:200px;"> Function to perform ROI alignment.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_ROI_ALIGN
+ </ul>
+ <td>NEROIAlignLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM16<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM16<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td>CLROIAlignLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32
+ <tr><td>QASYMM8<td>QASYMM16<td>QASYMM8
+ <tr><td>QASYMM8_SIGNED<td>QASYMM16<td>QASYMM8_SIGNED
+ </table>
+<tr>
+ <td rowspan="2">ROIPoolingLayer
+ <td rowspan="2" style="width:200px;"> Function to perform ROI pooling.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_ROI_POOLING
+ </ul>
+ <td>NEROIPoolingLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>F32<td>U16<td>F32
+ <tr><td>QASYMM8<td>U16<td>QASYMM8
+ </table>
+<tr>
+ <td>CLROIPoolingLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>dst
+ <tr><td>F16<td>U16<td>F16
+ <tr><td>F32<td>U16<td>F32
+ <tr><td>QASYMM8<td>U16<td>QASYMM8
+ </table>
+<tr>
<td rowspan="2">Scale
- <td rowspan="2" style="width:200px;"> Fucntion to perform resize a tensor using to interpolate: - Bilenear - Nearest neighbor
+ <td rowspan="2" style="width:200px;"> Function to perform resize a tensor using to interpolate: - Bilinear - Nearest neighbor
<td rowspan="2">
<ul>
<li>ANEURALNETWORKS_RESIZE_BILINEAR
@@ -695,6 +2095,34 @@ where N = batches, C = channels, H = height, W = width
<tr><td>S16<td>S16
</table>
<tr>
+ <td rowspan="2">Select
+ <td rowspan="2" style="width:200px;"> Function to select values from 2 tensors depending on an input tensor of booleans.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_SELECT
+ </ul>
+ <td>NESelect
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>U8<td>All<td>All<td>All
+ </table>
+<tr>
+ <td>CLSelect
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>U8<td>All<td>All<td>All
+ </table>
+<tr>
<td rowspan="2">Slice
<td rowspan="2" style="width:200px;"> Function to perform tensor slicing.
<td rowspan="2">
@@ -723,6 +2151,122 @@ where N = batches, C = channels, H = height, W = width
<tr><td>All<td>All
</table>
<tr>
+ <td rowspan="2">SpaceToBatchLayer
+ <td rowspan="2" style="width:200px;"> Function to divide a tensor spatially.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_SPACE_TO_BATCH_ND
+ </ul>
+ <td>NESpaceToBatchLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>All<td>S32<td>S32<td>All
+ </table>
+<tr>
+ <td>CLSpaceToBatchLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>All<td>S32<td>S32<td>All
+ </table>
+<tr>
+ <td rowspan="2">SpaceToDepthLayer
+ <td rowspan="2" style="width:200px;"> Function to rearrange blocks of spatial data into depth.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_SPACE_TO_DEPTH
+ </ul>
+ <td>NESpaceToDepthLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLSpaceToDepthLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td rowspan="2">Split
+ <td rowspan="2" style="width:200px;"> Function to split a tensor along a given axis.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_SPLIT
+ </ul>
+ <td>NESplit
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLSplit
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td rowspan="2">StackLayer
+ <td rowspan="2" style="width:200px;"> Function to stack tensors along an axis.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEStackLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLStackLayer
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
<td rowspan="2">StridedSlice
<td rowspan="2" style="width:200px;"> Function to extract a strided slice of a tensor.
<td rowspan="2">
@@ -751,8 +2295,36 @@ where N = batches, C = channels, H = height, W = width
<tr><td>All<td>All
</table>
<tr>
+ <td rowspan="2">Tile
+ <td rowspan="2" style="width:200px;"> Function to construct a tensor by tiling a given tensor.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_TILE
+ </ul>
+ <td>NETile
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLTile
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
<td rowspan="2">Transpose
- <td rowspan="2" style="width:200px;"> Function to transpose an 2D tensor.
+ <td rowspan="2" style="width:200px;"> Function to transpose a 2D tensor.
<td rowspan="2">
<ul>
<li>ANEURALNETWORKS_TRANSPOSE
@@ -778,6 +2350,66 @@ where N = batches, C = channels, H = height, W = width
<tr><th>src<th>dst
<tr><td>All<td>All
</table>
+<tr>
+ <td rowspan="2">Unstack
+ <td rowspan="2" style="width:200px;"> Function to unpack a rank-R tensor into rank-(R-1) tensors.
+ <td rowspan="2">
+ <ul>
+ <li>n/a
+ </ul>
+ <td>NEUnstack
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td>CLUnstack
+ <td>
+ <ul>
+ <li>All
+ </ul>
+ <td>
+ <table>
+ <tr><th>src<th>dst
+ <tr><td>All<td>All
+ </table>
+<tr>
+ <td rowspan="2">WinogradConvolutionLayer
+ <td rowspan="2" style="width:200px;"> Function to do Winograd Convolution.
+ <td rowspan="2">
+ <ul>
+ <li>ANEURALNETWORKS_CONV_2D
+ </ul>
+ <td>NEWinogradConvolutionLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ </table>
+<tr>
+ <td>CLWinogradConvolutionLayer
+ <td>
+ <ul>
+ <li>NHWC
+ <li>NCHW
+ </ul>
+ <td>
+ <table>
+ <tr><th>src0<th>src1<th>src2<th>dst
+ <tr><td>F16<td>F16<td>F16<td>F16
+ <tr><td>F32<td>F32<td>F32<td>F32
+ </table>
</table>
*/
diff --git a/src/core/CL/cl_kernels/bounding_box_transform.cl b/src/core/CL/cl_kernels/bounding_box_transform.cl
index a9b0496a6e..f2e9cb0ed0 100644
--- a/src/core/CL/cl_kernels/bounding_box_transform.cl
+++ b/src/core/CL/cl_kernels/bounding_box_transform.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
#if defined(DATA_TYPE) && defined(WEIGHT_X) && defined(WEIGHT_Y) && defined(WEIGHT_W) && defined(WEIGHT_H) && defined(IMG_WIDTH) && defined(IMG_HEIGHT) && defined(BOX_FIELDS) && defined(SCALE_BEFORE) // Check for compile time constants
-/** Perform a padded copy of input tensor to the output tensor. Padding values are defined at compile time
+/** Transform proposal bounding boxes to target bounding box using bounding box deltas.
*
* @attention The following variables must be passed at compile time:
* -# -DDATA_TYPE= Tensor data type. Supported data types: F16/F32
diff --git a/src/core/CL/cl_kernels/bounding_box_transform_quantized.cl b/src/core/CL/cl_kernels/bounding_box_transform_quantized.cl
index 9e5cee55f4..c1d45a56b9 100644
--- a/src/core/CL/cl_kernels/bounding_box_transform_quantized.cl
+++ b/src/core/CL/cl_kernels/bounding_box_transform_quantized.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
#if defined(DATA_TYPE) && defined(DATA_TYPE_DELTAS) && defined(WEIGHT_X) && defined(WEIGHT_Y) && defined(WEIGHT_W) && defined(WEIGHT_H) && defined(IMG_WIDTH) && defined(IMG_HEIGHT) && defined(BOX_FIELDS) && defined(SCALE_BEFORE) && defined(OFFSET_BOXES) && defined(SCALE_BOXES) && defined(OFFSET_DELTAS) && defined(SCALE_DELTAS) && defined(OFFSET_PRED_BOXES) && defined(SCALE_PRED_BOXES) // Check for compile time constants
-/** Perform a padded copy of input tensor to the output tensor for quantized data types. Padding values are defined at compile time
+/** Transform proposal bounding boxes to target bounding box using bounding box deltas for quantized data types.
*
* @attention The following variables must be passed at compile time:
* -# -DDATA_TYPE= Tensor data type. Supported data types: QASYMM16 for boxes and pred_boxes, QASYMM8 for for deltas
diff --git a/src/core/CL/cl_kernels/crop_tensor.cl b/src/core/CL/cl_kernels/crop_tensor.cl
index 62ae36ac5c..d9090dc838 100644
--- a/src/core/CL/cl_kernels/crop_tensor.cl
+++ b/src/core/CL/cl_kernels/crop_tensor.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
#if defined(DATA_TYPE) // Compile time constants
-/** Performs a copy of input tensor to the output tensor.
+/** Performs a tensor cropping.
*
* @param[in] in_ptr Pointer to the source tensor. Supported data types: All
* @param[in] in_stride_x Stride of the source tensor in X dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/depth_to_space.cl b/src/core/CL/cl_kernels/depth_to_space.cl
index d3231a59a1..f301e64d66 100644
--- a/src/core/CL/cl_kernels/depth_to_space.cl
+++ b/src/core/CL/cl_kernels/depth_to_space.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,10 +24,10 @@
#include "helpers.h"
#if defined(DATA_TYPE) && defined(BLOCK_SHAPE) && defined(CHANNEL_SIZE)
-/** Batch to space transformation. (NCHW)
+/** Depth to space transformation. (NCHW)
*
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
- * @note The input tensor batch size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2
+ * @note The input tensor depth size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2
* @note The block shape must be passed at compile time using -DBLOCK_SHAPE. e.g. -DBLOCK_SHAPE=2
*
* @param[in] input_ptr Pointer to the source tensor. Supported data types: All.
@@ -66,10 +66,10 @@ __kernel void depth_to_space_nchw(
*((__global DATA_TYPE *)tensor4D_offset(&out, out_x, out_y, z, batch_id)) = *((__global DATA_TYPE *)in.ptr);
}
-/** Batch to space transformation. (NHWC)
+/** Depth to space transformation. (NHWC)
*
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
- * @note The input tensor batch size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2
+ * @note The input tensor depth size must be passed at compile time using -DCHANNEL_SIZE. e.g. -DCHANNEL_SIZE=2
* @note The block shape must be passed at compile time using -DBLOCK_SHAPE. e.g. -DBLOCK_SHAPE=2
*
* @param[in] input_ptr Pointer to the source tensor. Supported data types: All.
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.h b/src/core/NEON/kernels/NEReductionOperationKernel.h
index 667305b3aa..08e654fd21 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.h
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.h
@@ -59,7 +59,7 @@ public:
/** Set the source, destination of the kernel
*
- * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32.
* @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
* Output will have the same number of dimensions as input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0
@@ -69,7 +69,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel.
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32.
* @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
* Output will have the same number of dimensions as input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0