Update operator list documentation

All the common information for the operators are stored in OperatorList.h. All data type and data layout information for the operators are store in the function header files. Partially resolve: COMPMID-4199 Signed-off-by: Sheri Zhang <sheri.zhang@arm.com> Change-Id: I272948cfb3f84e42232a82dd84c0158d84642099 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5511 Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Sheri Zhang <sheri.zhang@arm.com> 2021-04-22 14:41:12 +0100
committer: Sheri Zhang <sheri.zhang@arm.com> 2021-04-28 12:52:32 +0000
commit: a47dcc229d912d4e4bb5afa37220d20451f243a7 (patch)
tree: f8b296701fbdebfc7d29abc09144c49619bcca1c
parent: 2b7fee089c76226bfafcae77ba49f1eddb1e01da (diff)
download: ComputeLibrary-a47dcc229d912d4e4bb5afa37220d20451f243a7.tar.gz
46 files changed, 2291 insertions, 63 deletions
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index aa62183909..e6bb192532 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -59,6 +59,19 @@ public:
     CLActivationLayer &operator=(CLActivationLayer &&);
     /** Set the input and output tensor.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |QSYMM16        |QSYMM16        |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
+     *
      * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
      *
      * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
index bfc8a39ac9..71e84e21b5 100644
--- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
@@ -64,11 +64,22 @@ public:
     CLConcatenateLayer &operator=(CLConcatenateLayer &&);
     /** Initialise the kernel's inputs vector and output.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
      * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel,
      *       @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
      *
-     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out]    output        Output tensor. Data types supported: Same as @p input.
      * @param[in]     axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      */
@@ -80,7 +91,7 @@ public:
      *       @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
      *
      * @param[in]     compile_context The compile context to be used.
-     * @param[in,out] inputs_vector   The vectors containing all the tensors to concatenate. Data types supported: All
+     * @param[in,out] inputs_vector   The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out]    output          Output tensor. Data types supported: Same as @p input.
      * @param[in]     axis            Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      */
@@ -91,7 +102,7 @@ public:
      * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel,
      *       @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
      *
-     * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: All.
+     * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in] output        Output tensor info. Data types supported: Same as @p input.
      * @param[in] axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      *
diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
index 8892dbad6c..6c7d9e52e8 100644
--- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
@@ -55,6 +55,15 @@ public:
     CLConvertFullyConnectedWeights &operator=(CLConvertFullyConnectedWeights &&) = default;
     /** Initialize the function.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
      * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
      * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h
index 795a183e1f..4fc4183d3e 100644
--- a/arm_compute/runtime/CL/functions/CLCopy.h
+++ b/arm_compute/runtime/CL/functions/CLCopy.h
@@ -53,6 +53,14 @@ public:
     CLCopy &operator=(CLCopy &&);
     /** Initialise the function's source and destination.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input      Source tensor. Data types supported: All.
      * @param[out] output     Output tensor. Data types supported: Same as @p input.
      * @param[in]  dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
index 7ecf094df3..4a5c3a3203 100644
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
@@ -54,6 +54,23 @@ public:
     CLDequantizationLayer &operator=(CLDequantizationLayer &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src                |dst            |
+     * |:------------------|:--------------|
+     * |QASYMM8            |F16            |
+     * |QASYMM8            |F32            |
+     * |QASYMM8_SIGNED     |F16            |
+     * |QASYMM8_SIGNED     |F32            |
+     * |QSYMM8_PER_CHANNEL |F16            |
+     * |QSYMM8_PER_CHANNEL |F32            |
+     * |QSYMM8             |F16            |
+     * |QSYMM8             |F32            |
+     * |QSYMM16            |F16            |
+     * |QSYMM16            |F32            |
+     *
      * @param[in]  input  Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.
      *                    Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
index 6e9e2161b9..3fd0c63782 100644
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
@@ -55,6 +55,18 @@ public:
     CLDirectConvolutionLayer &operator=(CLDirectConvolutionLayer &&);
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src0           |src1           |src2   |dst            |
+     * |:--------------|:--------------|:------|:--------------|
+     * |F16            |F16            |F16    |F16            |
+     * |F32            |F32            |F32    |F32            |
+     * |QASYMM8        |QASYMM8        |S32    |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32    |QASYMM8_SIGNED |
+     *
      * @param[in]  input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
      *                       while every optional dimension from 4 and above represent a batch of inputs.
      *                       Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h
index 731bad5c32..c7112dc737 100644
--- a/arm_compute/runtime/CL/functions/CLFFT1D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT1D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,6 +61,15 @@ public:
     ~CLFFT1D();
     /** Initialise the function's source, destinations and border mode.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     * |F16    |F16    |
+     *
      * @param[in]  input  Source tensor. Data types supported: F16/F32.
      * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
      * @param[in]  config FFT related configuration
diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h
index adc8e46cb2..3d20327bf1 100644
--- a/arm_compute/runtime/CL/functions/CLFFT2D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT2D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,6 +58,15 @@ public:
     ~CLFFT2D();
     /** Initialise the function's source, destinations and border mode.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     * |F16    |F16    |
+     *
      * @param[in]  input  Source tensor. Data types supported: F16/F32.
      * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
      * @param[in]  config FFT related configuration
diff --git a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
index 5085f5a66c..f873cb0b86 100644
--- a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -71,6 +71,15 @@ public:
     CLFFTConvolutionLayer &operator=(CLFFTConvolutionLayer &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     * |F16    |F16    |
+     *
      * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
      *
      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h
index 9a27d158a6..a01e0c3188 100644
--- a/arm_compute/runtime/CL/functions/CLFill.h
+++ b/arm_compute/runtime/CL/functions/CLFill.h
@@ -52,6 +52,14 @@ public:
     CLFill &operator=(CLFill &&);
     /** Initialize the kernel's tensor and filling value
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in,out] tensor         Input tensor to fill. Supported data types: All.
      * @param[in]     constant_value The value used to fill the planes of the tensor
      * @param[in]     window         Window to be used in case setting only part of a tensor. Default is nullptr.
diff --git a/arm_compute/runtime/CL/functions/CLFloor.h b/arm_compute/runtime/CL/functions/CLFloor.h
index 7c7607002d..87cd5b44c7 100644
--- a/arm_compute/runtime/CL/functions/CLFloor.h
+++ b/arm_compute/runtime/CL/functions/CLFloor.h
@@ -54,6 +54,15 @@ public:
     CLFloor &operator=(CLFloor &&);
     /** Set the source, destination of the kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     * |F16    |F16    |
+     *
      * @param[in]  input  Source tensor. Data type supported: F16/F32.
      * @param[out] output Destination tensor. Same as @p input
      */
diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h
index 7b6667044e..b123d0e46e 100644
--- a/arm_compute/runtime/CL/functions/CLPReluLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h
@@ -53,6 +53,17 @@ public:
     CLPReluLayer &operator=(CLPReluLayer &&);
     /** Set the input and output tensor.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
      *
      * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h
index e59cca2a67..8e15da2287 100644
--- a/arm_compute/runtime/CL/functions/CLPermute.h
+++ b/arm_compute/runtime/CL/functions/CLPermute.h
@@ -53,6 +53,15 @@ public:
     CLPermute &operator=(CLPermute &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
      *
      * @param[in] input  The input tensor to permute. Data types supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index 9f70b05a35..14422383ad 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -52,19 +52,23 @@ public:
     CLPixelWiseMultiplication &operator=(CLPixelWiseMultiplication &&);
     /** Initialise the kernel's inputs, output and convertion policy.
      *
-     * Valid configurations (Input1,Input2) -> Output :
+     * Valid data layouts:
+     * - All
      *
-     *   - (U8,U8)                         -> U8
-     *   - (U8,U8)                         -> S16
-     *   - (U8,S16)                        -> S16
-     *   - (S16,U8)                        -> S16
-     *   - (S16,S16)                       -> S16
-     *   - (F16,F16)                       -> F16
-     *   - (F32,F32)                       -> F32
-     *   - (QASYMM8,QASYMM8)               -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16)               -> QSYMM16
-     *   - (QSYMM16,QSYMM16)               -> S32
+     * Valid data type configurations:
+     * |src0           |src1           |dst            |
+     * |:--------------|:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |QSYMM16        |QSYMM16        |QASYMM16       |
+     * |QSYMM16        |QSYMM16        |S32            |
+     * |U8             |U8             |U8             |
+     * |U8             |U8             |S16            |
+     * |U8             |S16            |S16            |
+     * |S16            |U8             |S16            |
+     * |S16            |S16            |S16            |
+     * |F16            |F16            |F16            |
+     * |F32            |S32            |F32            |
      *
      * @param[in, out] input1          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -81,20 +85,6 @@ public:
                    ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Initialise the kernel's inputs, output and convertion policy.
      *
-     * Valid configurations (Input1,Input2) -> Output :
-     *
-     *   - (U8,U8)                         -> U8
-     *   - (U8,U8)                         -> S16
-     *   - (U8,S16)                        -> S16
-     *   - (S16,U8)                        -> S16
-     *   - (S16,S16)                       -> S16
-     *   - (F16,F16)                       -> F16
-     *   - (F32,F32)                       -> F32
-     *   - (QASYMM8,QASYMM8)               -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16)               -> QSYMM16
-     *   - (QSYMM16,QSYMM16)               -> S32
-     *
      * @param[in]      compile_context The compile context to be used.
      * @param[in, out] input1          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -111,21 +101,6 @@ public:
                    ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplication
      *
-     * Valid configurations (Input1,Input2) -> Output :
-     *
-     *   - (U8,U8)                         -> U8
-     *   - (U8,U8)                         -> S16
-     *   - (U8,S16)                        -> S16
-     *   - (S16,U8)                        -> S16
-     *   - (S16,S16)                       -> S16
-     *   - (F16,F16)                       -> F16
-     *   - (F32,F32)                       -> F32
-     *   - (QASYMM8,QASYMM8)               -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16)               -> QSYMM16
-     *   - (QSYMM16,QSYMM16)               -> S32
-     *
-     *
      * @param[in] input1          An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      * @param[in] input2          An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      * @param[in] output          The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
index ec1c4eb768..902feca234 100644
--- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
@@ -54,6 +54,18 @@ public:
     CLPoolingLayer &operator=(CLPoolingLayer &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @param[in,out] input     Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out]    output    Destination tensor. Data types supported: Same as @p input.
      * @param[in]     pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
index c5dad53513..6a44a226d4 100644
--- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
@@ -59,6 +59,25 @@ public:
     CLQuantizationLayer &operator=(CLQuantizationLayer &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src                |dst            |
+     * |:------------------|:--------------|
+     * |QASYMM8            |QASYMM8        |
+     * |QASYMM8            |QASYMM8_SIGNED |
+     * |QASYMM8            |QASYMM16       |
+     * |QASYMM8_SIGNED     |QASYMM8        |
+     * |QASYMM8_SIGNED     |QASYMM8_SIGNED |
+     * |QASYMM8_SIGNED     |QASYMM16       |
+     * |F16                |QASYMM8        |
+     * |F16                |QASYMM8_SIGNED |
+     * |F16                |QASYMM16       |
+     * |F32                |QASYMM8        |
+     * |F32                |QASYMM8_SIGNED |
+     * |F32                |QASYMM16       |
+     *
      * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
      * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
      *
diff --git a/arm_compute/runtime/CL/functions/CLReshapeLayer.h b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
index 60ed81680e..7346b65e9b 100644
--- a/arm_compute/runtime/CL/functions/CLReshapeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
@@ -52,6 +52,14 @@ public:
     CLReshapeLayer &operator=(CLReshapeLayer &&);
     /** Initialise the kernel's inputs and outputs
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input  First tensor input. Data type supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h
index 8a67f74bb6..ddb4a23531 100644
--- a/arm_compute/runtime/CL/functions/CLScale.h
+++ b/arm_compute/runtime/CL/functions/CLScale.h
@@ -55,6 +55,20 @@ public:
 
     /** Initialize the function's source, destination, interpolation type and border_mode.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     * |U8             |U8             |
+     * |S16            |S16            |
+     *
      * @param[in,out] input  Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
      * @param[out]    output Destination tensor. Data types supported: Same as @p input
      *                       All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h
index f17e77236d..7a7689c528 100644
--- a/arm_compute/runtime/CL/functions/CLSlice.h
+++ b/arm_compute/runtime/CL/functions/CLSlice.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -91,6 +91,14 @@ public:
     CLSlice &operator=(CLSlice &&);
     /** Configure kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @note Supported tensor rank: up to 4
      * @note Start indices must be non-negative. 0 <= starts[i]
      * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
diff --git a/arm_compute/runtime/CL/functions/CLStridedSlice.h b/arm_compute/runtime/CL/functions/CLStridedSlice.h
index fdbef81f7d..6fab0c0186 100644
--- a/arm_compute/runtime/CL/functions/CLStridedSlice.h
+++ b/arm_compute/runtime/CL/functions/CLStridedSlice.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,6 +54,14 @@ public:
     CLStridedSlice &operator=(CLStridedSlice &&);
     /** Configure kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @note Supported tensor rank: up to 4
      *
      * @param[in]  input            Source tensor. Data type supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h
index 43cebeba90..a866aeabaa 100644
--- a/arm_compute/runtime/CL/functions/CLTranspose.h
+++ b/arm_compute/runtime/CL/functions/CLTranspose.h
@@ -53,6 +53,14 @@ public:
     CLTranspose &operator=(CLTranspose &&) = default;
     /** Initialise the kernel's inputs and output
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input  Input tensor. Data types supported: All.
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index ffda8406aa..b39a8d7701 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -62,6 +62,18 @@ public:
     /** [NEActivationLayer snippet] **/
     /** Set the input and output tensor.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |QSYMM16        |QSYMM16        |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
      *
      * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index 8f9fd27906..734e3502dd 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -51,19 +51,25 @@ public:
     NEArithmeticAddition &operator=(NEArithmeticAddition &&);
     /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * Valid configurations (Input1,Input2) -> Output :
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
      *
-     *   - (U8,U8)           -> U8
-     *   - (U8,U8)           -> S16
-     *   - (S16,U8)          -> S16
-     *   - (U8,S16)          -> S16
-     *   - (S16,S16)         -> S16
-     *   - (S32,S32)         -> S32
-     *   - (F16,F16)         -> F16
-     *   - (F32,F32)         -> F32
-     *   - (QASYMM8,QASYMM8) -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16) -> QSYMM16
+     * Valid data type configurations:
+     * |src0           |src1           |dst            |
+     * |:--------------|:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |QSYMM16        |QSYMM16        |QASYMM16       |
+     * |QSYMM16        |QSYMM16        |S32            |
+     * |U8             |U8             |U8             |
+     * |U8             |U8             |S16            |
+     * |U8             |S16            |S16            |
+     * |S16            |U8             |S16            |
+     * |S16            |S16            |S16            |
+     * |S32            |S32            |S32            |
+     * |F16            |F16            |F16            |
+     * |F32            |F32            |F32            |
      *
      * @param[in]  input1   First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
      * @param[in]  input2   Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index 6aa724ab0c..dd1c709d76 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -55,6 +55,17 @@ public:
     NEConcatenateLayer &operator=(NEConcatenateLayer &&);
     /** Initialise the kernel's inputs vector and output.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
      * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel,
      *       @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel.
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index f9ce66db13..218877d421 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -51,6 +51,15 @@ public:
     ~NEConvertFullyConnectedWeights();
     /** Initialize the function.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
      * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
      * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index 56f8bd9803..ee02c259f4 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -53,6 +53,14 @@ public:
     NECopy &operator=(NECopy &&);
     /** Initialise the function's source and destination.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input  Source tensor. Data types supported: All
      * @param[out] output Output tensor. Data types supported: Same as @p input.
      *
diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
index a18566e6ca..dfec835f45 100644
--- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
@@ -53,6 +53,23 @@ public:
     NEDequantizationLayer &operator=(NEDequantizationLayer &&) = default;
     /** Configure the kernel.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src                |dst            |
+     * |:------------------|:--------------|
+     * |QASYMM8            |F16            |
+     * |QASYMM8            |F32            |
+     * |QASYMM8_SIGNED     |F16            |
+     * |QASYMM8_SIGNED     |F32            |
+     * |QSYMM8_PER_CHANNEL |F16            |
+     * |QSYMM8_PER_CHANNEL |F32            |
+     * |QSYMM8             |F16            |
+     * |QSYMM8             |F32            |
+     * |QSYMM16            |F16            |
+     * |QSYMM16            |F32            |
+     *
      * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
      */
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index fc4017e635..82cabed6c9 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -58,6 +58,16 @@ public:
     ~NEDirectConvolutionLayer();
     /** Set the input, weights, biases and output tensors.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src0   |src1   |src2   |dst    |
+     * |:------|:------|:------|:------|
+     * |F16    |F16    |F16    |F16    |
+     * |F32    |F32    |F32    |F32    |
+     *
      * @note: DirectConvolution only works in the following configurations:
      *    1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
      *    3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h
index a533aa7f48..9654b1e604 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT1D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h
@@ -63,6 +63,14 @@ public:
     ~NEFFT1D();
     /** Initialise the function's source and destinations.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     *
      * @param[in]  input  Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
      * @param[out] output Destination tensor.  Data types and data layouts supported: Same as @p input.
      *                    Number of channels supported: 1 (real tensor) or 2 (complex tensor).If @p input is real, @p output must be complex.
diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h
index ce84a85105..57f38d1942 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT2D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h
@@ -58,6 +58,14 @@ public:
     ~NEFFT2D();
     /** Initialise the function's source and destinations
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     *
      * @param[in]  input  Source tensor. Data types supported: F32.
      * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
      * @param[in]  config FFT related configuration
diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
index 213fa6093b..c5f4d45b6b 100644
--- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
@@ -73,6 +73,14 @@ public:
     ~NEFFTConvolutionLayer();
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     *
      * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
      *
      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h
index ba5d020496..e923ce33e1 100644
--- a/arm_compute/runtime/NEON/functions/NEFill.h
+++ b/arm_compute/runtime/NEON/functions/NEFill.h
@@ -53,6 +53,14 @@ public:
     NEFill &operator=(NEFill &&);
     /** Initialize the function
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @param[in,out] tensor         Source tensor. Data types supported: All
      * @param[in]     constant_value Constant value to use to fill tensor.
      */
diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h
index 9560eb9169..4d47b068db 100644
--- a/arm_compute/runtime/NEON/functions/NEFloor.h
+++ b/arm_compute/runtime/NEON/functions/NEFloor.h
@@ -54,6 +54,15 @@ public:
     ~NEFloor();
     /** Set the source, destination of the kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     * |F16    |F16    |
+     *
      * @param[in]  input  Source tensor. Data type supported: F16/F32.
      * @param[out] output Destination tensor. Same as @p input
      */
diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
index b07febfe7f..81d5fd162c 100644
--- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
@@ -55,6 +55,17 @@ public:
     NEPReluLayer &operator=(NEPReluLayer &&);
     /** Set the input and output tensor.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in]  alpha  Source alpha tensor. Data types supported: same of @p input.
      * @param[out] output Destination tensor. Data type supported: same as @p input
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index 2508458a3d..c863fde0ac 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -54,6 +54,15 @@ public:
     NEPermute &operator=(NEPermute &&) = default;
     /** Configure the permute function
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
      *
      * @param[in]  input  The input tensor to permute. Data types supported: All
diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
index 6f4cce3cde..f8074e791a 100644
--- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
+++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
@@ -52,6 +52,24 @@ public:
     NEPixelWiseMultiplication &operator=(NEPixelWiseMultiplication &&) = default;
     /** Initialise the kernel's inputs, output and convertion policy.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src0           |src1           |dst            |
+     * |:--------------|:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |QSYMM16        |QSYMM16        |QASYMM16       |
+     * |QSYMM16        |QSYMM16        |S32            |
+     * |U8             |U8             |U8             |
+     * |U8             |U8             |S16            |
+     * |U8             |S16            |S16            |
+     * |S16            |U8             |S16            |
+     * |S16            |S16            |S16            |
+     * |F16            |F16            |F16            |
+     * |F32            |S32            |F32            |
+     *
      * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
      *       For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
      *
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index cb136ebca9..851dc0ca32 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -59,6 +59,18 @@ public:
     ~NEPoolingLayer();
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @note F16 is supported for pool sizes 2 and 3 only
      *
      * @param[in, out] input     Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index 9e2d9ecf24..a7fadfc7cd 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -52,6 +52,25 @@ public:
     NEQuantizationLayer &operator=(NEQuantizationLayer &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src                |dst            |
+     * |:------------------|:--------------|
+     * |QASYMM8            |QASYMM8        |
+     * |QASYMM8            |QASYMM8_SIGNED |
+     * |QASYMM8            |QASYMM16       |
+     * |QASYMM8_SIGNED     |QASYMM8        |
+     * |QASYMM8_SIGNED     |QASYMM8_SIGNED |
+     * |QASYMM8_SIGNED     |QASYMM16       |
+     * |F16                |QASYMM8        |
+     * |F16                |QASYMM8_SIGNED |
+     * |F16                |QASYMM16       |
+     * |F32                |QASYMM8        |
+     * |F32                |QASYMM8_SIGNED |
+     * |F32                |QASYMM16       |
+     *
      * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
      * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
      */
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index b8c0a841bc..3e6e33f797 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -52,6 +52,14 @@ public:
     NEReshapeLayer &operator=(NEReshapeLayer &&);
     /** Initialise the kernel's inputs and outputs
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @param[in]  input  Input tensor. Data type supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h
index 45658a7cd3..233ee2969e 100644
--- a/arm_compute/runtime/NEON/functions/NEScale.h
+++ b/arm_compute/runtime/NEON/functions/NEScale.h
@@ -48,6 +48,20 @@ public:
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(NEScale);
     /** Initialize the function's source, destination, interpolation type and border_mode.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     * |U8             |U8             |
+     * |S16            |S16            |
+     *
      * @param[in, out] input  Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
      * @param[out]     output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
      * @param[in]      info   @ref ScaleKernelInfo to be used for configuration
diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h
index 28628778cb..214ffa512c 100644
--- a/arm_compute/runtime/NEON/functions/NESlice.h
+++ b/arm_compute/runtime/NEON/functions/NESlice.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -89,6 +89,14 @@ public:
 
     /** Configure kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @note Supported tensor rank: up to 4
      * @note Start indices must be non-negative. 0 <= starts[i]
      * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
index f9c94f5301..7ba6a52a58 100644
--- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h
+++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -95,6 +95,14 @@ public:
 
     /** Configure kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @note Supported tensor rank: up to 4
      *
      * @param[in]  input            Source tensor. Data type supported: All
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 78916f67b7..581fe74309 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -54,6 +54,14 @@ public:
     NETranspose &operator=(NETranspose &&) = default;
     /** Initialise the kernel's inputs and output
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @param[in]  input  Input tensor. Data types supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
diff --git a/arm_compute/runtime/OperatorList.h b/arm_compute/runtime/OperatorList.h
new file mode 100644
index 0000000000..8c43c68b90
--- /dev/null
+++ b/arm_compute/runtime/OperatorList.h
@@ -0,0 +1,1028 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_OPERATOR_LIST_H
+#define ARM_COMPUTE_OPERATOR_LIST_H
+
+/** ActivationLayer
+ *
+ * Description:
+ * Function to simulate an activation layer with the specified activation function.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ELU
+ * ANEURALNETWORKS_HARD_SWISH
+ * ANEURALNETWORKS_LOGISTIC
+ * ANEURALNETWORKS_RELU
+ * ANEURALNETWORKS_RELU1
+ * ANEURALNETWORKS_RELU6
+ * ANEURALNETWORKS_TANH
+ *
+ */
+
+/** ArgMinMaxLayer (not ported)
+ *
+ * Description:
+ * Function to calculate the index of the minimum or maximum values in a tensor based on an axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ARGMAX
+ * ANEURALNETWORKS_ARGMIN
+ *
+ */
+
+/** ArithmeticAddition (no CL)
+ *
+ * Description:
+ * Function to add 2 tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ADD
+ *
+ */
+
+/** ArithmeticSubtraction (no CL)
+ *
+ * Description:
+ * Function to substract 2 tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SUB
+ *
+ */
+
+/** BatchNormalizationLayer (not ported)
+ *
+ * Description:
+ * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f]
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** BatchToSpaceLayer (not ported)
+ *
+ * Description:
+ * Rearranges (permutes) data from batch into blocks of spatial data, followed by cropping. It is the reverse transformation of SpaceToBatch (from TF website)
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_BATCH_TO_SPACE_ND
+ *
+ */
+
+/** BitwiseAnd (not ported)
+ *
+ * Description:
+ * Function to performe bitwise AND between 2 tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_LOGICAL_AND
+ *
+ */
+
+/** BitwiseNot (not ported)
+ *
+ * Description:
+ * Function to performe bitwise NOT.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_LOGICAL_NOT
+ *
+ */
+
+/** BitwiseOr (not ported)
+ *
+ * Description:
+ * Function to performe bitwise OR between 2 tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_LOGICAL_OR
+ *
+ */
+
+/** BitwiseXor (not ported)
+ *
+ * Description:
+ * Function to performe bitwise XOR between 2 tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** BoundingBoxTransform (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ?
+ *
+ */
+
+/** Cast (not ported)
+ *
+ * Description:
+ * Function to cast a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CAST
+ *
+ */
+
+/** ChannelShuffelLayer (not ported)
+ *
+ * Description:
+ * Function to cast a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CHANNEL_SHUFFLE
+ *
+ */
+
+/** Comparison (not ported) (only CL)
+ *
+ * Description:
+ * Function to cast a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_EQUAL
+ * ANEURALNETWORKS_GREATER
+ * ANEURALNETWORKS_GREATER_EQUAL
+ * ANEURALNETWORKS_LESS
+ * ANEURALNETWORKS_LESS_EQUAL
+ * ANEURALNETWORKS_NOT_EQUAL
+ *
+ */
+
+/** ConcatenateLayer
+ *
+ * Description:
+ * Function to concatenate tensors along a given axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CONCATENATION
+ *
+ */
+
+/** ConvertFullyConnectedWeights
+ *
+ * Description:
+ * Function to tranpose the wieghts for the fully connected layer.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** ConvolutionLayer (not ported)
+ *
+ * Description:
+ * Function to compute a convolution layer.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CONV_2D
+ *
+ */
+
+/** Copy
+ *
+ * Description:
+ * Function to copy a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** Crop (only CL)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ?
+ *
+ */
+
+/** CropResize (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ?
+ *
+ */
+
+/** DeconvolutionLayer (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TRANSPOSE_CONV_2D
+ *
+ */
+
+/** DeconvolutionLayerUpsample (only CL) (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TRANSPOSE_CONV_2D
+ *
+ */
+
+/** DepthConverterLayer (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** DepthToSpaceLayer (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_DEPTH_TO_SPACE
+ *
+ */
+
+/** DepthwiseConvolutionLayer (not ported)
+ *
+ * Description:
+ * Function to perform depthwise separable convolution
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_DEPTHWISE_CONV_2D
+ *
+ */
+
+/** DequantizationLayer
+ *
+ * Description:
+ * Function to dequantize the values in a tensor
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_DEQUANTIZE
+ *
+ */
+
+/** DetectionPostProcessLayer (not ported) (no CL)
+ *
+ * Description:
+ * Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS)
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_DETECTION_POSTPROCESSING
+ *
+ */
+
+/** DirectConvolutionLayer
+ *
+ * Description:
+ * Function to
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CONV_2D
+ *
+ */
+
+/** DirectDeconvolutionLayer (only CL)
+ *
+ * Description:
+ * Function to
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TRANSPOSE_CONV_2D
+ *
+ */
+
+/** ElementWiseOperations (skip)
+ *
+ * Description:
+ * Function to perform in Cpu:
+ * - Div
+ * - Max
+ * - Min
+ * - Pow
+ * - SquaredDiff
+ * - Comparisons (Equal, greater, greater_equal, less, less_equal, not_equal)
+ *
+ * Function to perform in CL:
+ * - Add
+ * - Sub
+ * - Div
+ * - Max
+ * - Min
+ * - Pow
+ * - SquaredDiff
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_MAXIMUM
+ * ANEURALNETWORKS_MINIMUM
+ * ANEURALNETWORKS_POW
+ * ANEURALNETWORKS_DIV
+ * ANEURALNETWORKS_ADD (only CL)
+ * ANEURALNETWORKS_SUB (only CL)
+ * ANEURALNETWORKS_EQUAL (no CL)
+ * ANEURALNETWORKS_GREATER (no CL)
+ * ANEURALNETWORKS_GREATER_EQUAL (no CL)
+ * ANEURALNETWORKS_LESS (no CL)
+ * ANEURALNETWORKS_LESS_EQUAL (no CL)
+ * ANEURALNETWORKS_NOT_EQUAL (no CL)
+ *
+ */
+
+/** ElementWiseOperationUnary (skip)
+ *
+ * Description:
+ * Function to perform:
+ * - Rsqrt
+ * - Exp
+ * - Neg
+ * - Log
+ * - Abs
+ * - Round
+ * - Sin
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ABS
+ * ANEURALNETWORKS_EXP
+ * ANEURALNETWORKS_LOG
+ * ANEURALNETWORKS_NEG
+ * ANEURALNETWORKS_RSQRT
+ * ANEURALNETWORKS_SIN
+ *
+ */
+
+/** FFT1D
+ *
+ * Description:
+ * Fast Fourier Transform 1D
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** FFT2D
+ *
+ * Description:
+ * Fast Fourier Transform 2D
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** FFTConvolutionLayer
+ *
+ * Description:
+ * Fast Fourier Transform Convolution
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CONV_2D
+ *
+ */
+
+/** Fill
+ *
+ * Description:
+ * Set the values of a tensor with a given value
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_FILL
+ *
+ */
+
+/** FillBorder (not ported)
+ *
+ * Description:
+ *
+ *
+ * Equivalent Android NNAPI Op:
+ * ?
+ *
+ */
+
+/** FlattenLayer (not ported)
+ *
+ * Description:
+ * Reshape a tensor to be 1D
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_RESHAPE
+ *
+ */
+
+/** Floor
+ *
+ * Description:
+ * Round the value to the lowest number
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_FLOOR
+ *
+ */
+
+/** FullyConnectedLayer (not ported)
+ *
+ * Description:
+ * Function to perform a fully connected / dense layer
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_FULLY_CONNECTED
+ *
+ */
+
+/** FuseBatchNormalization (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** Gather (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_GATHER
+ *
+ */
+
+/** GEMM (not ported)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GEMMConv2D (not ported) (no CL)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GEMMConvolutionLayer (not ported)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GEMMDeconvolutionLayer (not ported) (only CL)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GEMMLowpMatrixMultiplyCore (not ported)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GEMMLowpOutputStage (not ported)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GenerateProposalsLayer (not ported)
+ *
+ * Description:
+ * Function to generate proposals for a RPN (Region Proposal Network).
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_GENERATE_PROPOSALS
+ *
+ */
+
+/** InstanceNormalizationLayer (not ported)
+ *
+ * Description:
+ * Function to perform a Instance normalization on a given axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_INSTANCE_NORMALIZATION
+ *
+ */
+
+/** L2NormalizationLayer (not ported)
+ *
+ * Description:
+ * Function to perform a L2 normalization on a given axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_L2_NORMALIZATION
+ *
+ */
+
+/** Logical (no CL)
+ *
+ * Description:
+ * Function to perform:
+ * - Logical AND
+ * - Logical OR
+ * - Logical NOT
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** LogicalAnd (only CL)
+ *
+ * Description:
+ * Function to perform Logical AND
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** LogicalOr (only CL)
+ *
+ * Description:
+ * Function to perform Logical OR
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** LogicalNot (only CL)
+ *
+ * Description:
+ * Function to perform Logical NOT
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** LSTMLayer (not ported)
+ *
+ * Description:
+ * Function to perform LSTM
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_LSTM
+ *
+ */
+
+/** LSTMLayerQuantized (not ported)
+ *
+ * Description:
+ * Function to perform LSTM
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_QUANTIZED_LSTM
+ * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM ?
+ *
+ */
+
+/** MaxUnpoolingLayer (not ported)
+ *
+ * Description:
+ * Function to perform MaxUnpooling
+ *
+ * Equivalent Android NNAPI Op:
+ *  ?
+ *
+ */
+
+/** MeanStdDevNormalizationLayer (not ported)
+ *
+ * Description:
+ * Function to execute mean and standard deviation normalization.
+ *
+ * Equivalent Android NNAPI Op:
+ * None ?
+ *
+ */
+
+/** MeanStdDevNormalizationLayer (not ported)
+ *
+ * Description:
+ * Function to execute mean and standard deviation normalization.
+ *
+ * Equivalent Android NNAPI Op:
+ * None ?
+ *
+ */
+
+/** NormalizationLayer (not ported)
+ *
+ * Description:
+ * Function to compute normalization layer.
+ *
+ * Equivalent Android NNAPI Op:
+ * None ?
+ *
+ */
+
+/** PadLayer (not ported)
+ *
+ * Description:
+ * Function to pad a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_PAD
+ * ANEURALNETWORKS_PAD_V2
+ *
+ */
+
+/** Permute
+ *
+ * Description:
+ * Function to transpose an ND tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TRANSPOSE
+ *
+ */
+
+/** PixelWiseMultiplication
+ *
+ * Description:
+ * Function to performe a multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_MUL
+ *
+ */
+
+/** PoolingLayer
+ *
+ * Description:
+ * Function to performe pooling with the specified pooling operation.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_AVERAGE_POOL_2D
+ * ANEURALNETWORKS_L2_POOL_2D
+ * ANEURALNETWORKS_MAX_POOL_2D
+ *
+ */
+
+/** PReluLayer
+ *
+ * Description:
+ * Function to compute the activation layer with the PRELU activation function.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_PRELU
+ *
+ */
+
+/** PriorBoxLayer (not ported)
+ *
+ * Description:
+ * Function to compute the activation layer with the PRELU activation function.
+ *
+ * Equivalent Android NNAPI Op:
+ * ?
+ *
+ */
+
+/** QLSTMLayer (not ported)
+ *
+ * Description:
+ * Function to perform LSTM
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_QUANTIZED_LSTM
+ * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM ?
+ *
+ */
+
+/** QuantizationLayer
+ *
+ * Description:
+ * Function to perform quantization layer
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_QUANTIZE
+ *
+ */
+
+/** Range (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * none?
+ *
+ */
+
+/** RecudeMean (not ported)
+ *
+ * Description:
+ * Function to performe reduce mean operation.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_MEAN
+ *
+ */
+
+/** RecudeOperation (not ported)
+ *
+ * Description:
+ * Function to performe reduce mean operation.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_REDUCE_ALL
+ * ANEURALNETWORKS_REDUCE_ANY
+ * ANEURALNETWORKS_REDUCE_MAX
+ * ANEURALNETWORKS_REDUCE_MIN
+ * ANEURALNETWORKS_REDUCE_PROD
+ * ANEURALNETWORKS_REDUCE_SUM
+ *
+ */
+
+/** RecudeOperation (not ported)
+ *
+ * Description:
+ * Function to performe reduce with the following operations
+ * - ARG_IDX_MAX: Index of the max value
+ * - ARG_IDX_MIN: Index of the min value
+ * - MEAN_SUM:    Mean of sum
+ * - PROD:        Product
+ * - SUM_SQUARE:  Sum of squares
+ * - SUM:         Sum
+ * - MIN:         Min
+ * - MAX:         Max
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_REDUCE_ALL
+ * ANEURALNETWORKS_REDUCE_ANY
+ * ANEURALNETWORKS_REDUCE_MAX
+ * ANEURALNETWORKS_REDUCE_MIN
+ * ANEURALNETWORKS_REDUCE_PROD
+ * ANEURALNETWORKS_REDUCE_SUM
+ *
+ */
+
+/** ReorgLayer (not ported)
+ *
+ * Description:
+ * Function to performe reorg
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** ReshapeLayer
+ *
+ * Description:
+ * Fucntion to reshape a tensor
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_RESHAPE
+ * ANEURALNETWORKS_SQUEEZE
+ *
+ */
+
+/** ReverseLayer (not ported)
+ *
+ * Description:
+ * Fucntion to .
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** RNNLayer (not ported)
+ *
+ * Description:
+ * Fucntion to perform RNN .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_RNN
+ *
+ */
+
+/** ROIAligmentLayer (not ported)
+ *
+ * Description:
+ * Fucntion to perform RNN .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ROI_ALIGN
+ *
+ */
+
+/** ROIPoolingLayer (not ported)
+ *
+ * Description:
+ * Fucntion to perform RNN .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ROI_POOLING
+ *
+ */
+
+/** Scale
+ *
+ * Description:
+ * Fucntion to perform resize a tensor using to interpolate:
+ * - Bilenear
+ * - Nearest neighbor
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_RESIZE_BILINEAR
+ * ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR
+ *
+ */
+
+/** Select (not ported)
+ *
+ * Description:
+ * Fucntion to select values from 2 tensors depending on an input tensor of booleans.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SELECT
+ *
+ */
+
+/** Slice
+ *
+ * Description:
+ * Function to perform tensor slicing.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SLICE
+ *
+ */
+
+/** SoftmaxLayer (skip)
+ *
+ * Description:
+ * Function to compute a SoftmaxLayer and a Log SoftmaxLayer.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_LOG_SOFTMAX
+ * ANEURALNETWORKS_SOFTMAX
+ *
+ */
+
+/** SpaceToBatchLayer (not ported)
+ *
+ * Description:
+ * Function to divide a tensor spatially.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SPACE_TO_BATCH_ND
+ *
+ */
+
+/** SpaceToDepthLayer (not ported)
+ *
+ * Description:
+ * Function to rearrange blocks of spatial data into depth.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SPACE_TO_DEPTH
+ *
+ */
+
+/** Split (not ported)
+ *
+ * Description:
+ * Function to split a tensor along a given axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SPLIT
+ *
+ */
+
+/** StackLayer (not ported)
+ *
+ * Description:
+ * Function to stack tensors along an axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * none
+ *
+ */
+
+/** StridedSlice
+ *
+ * Description:
+ * Function to extract a strided slice of a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_STRIDED_SLICE
+ *
+ */
+
+/** Tile  (not ported)
+ *
+ * Description:
+ * Function to construct a tensor by tiling a given tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TILE
+ *
+ */
+
+/** Transpose
+ *
+ * Description:
+ * Function to transpose an 2D tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TRANSPOSE
+ *
+ */
+
+/** Unstack (not ported)
+ *
+ * Description:
+ * Function to unpack a rank-R tensor into rank-(R-1) tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * none
+ *
+ */
+
+/** WinogradConvolutionLayer (not ported)
+ *
+ * Description:
+ * Function to.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** WinogradInputTransform (not ported) (only CL)
+ *
+ * Description:
+ * Function to.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+#endif /* ARM_COMPUTE_OPERATOR_LIST_H */
+\ No newline at end of file
diff --git a/docs/09_operators_list.dox b/docs/09_operators_list.dox
new file mode 100644
index 0000000000..82a127bbd3
--- /dev/null
+++ b/docs/09_operators_list.dox
@@ -0,0 +1,784 @@
+///
+/// Copyright (c) 2021 Arm Limited.
+///
+/// SPDX-License-Identifier: MIT
+///
+/// Permission is hereby granted, free of charge, to any person obtaining a copy
+/// of this software and associated documentation files (the "Software"), to
+/// deal in the Software without restriction, including without limitation the
+/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+/// sell copies of the Software, and to permit persons to whom the Software is
+/// furnished to do so, subject to the following conditions:
+///
+/// The above copyright notice and this permission notice shall be included in all
+/// copies or substantial portions of the Software.
+///
+/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+/// SOFTWARE.
+///
+namespace arm_compute
+{
+/**
+@page operators_list Supported Operators
+
+@tableofcontents
+
+@section S9_1_operators_list Supported Operators
+
+Compute Library supports operators that are listed in below table.
+
+Compute Library supports a wide list of data-types, information can been directly found in the documentation of each kernel/function.
+The main data-types that the Machine Learning functions support are the following:
+  <ul>
+    <li>BFLOAT16: 16-bit non-standard brain floating point
+    <li>QASYMM8: 8-bit unsigned asymmetric quantized
+    <li>QASYMM8_SIGNED: 8-bit signed asymmetric quantized
+    <li>QSYMM8_PER_CHANNEL: 8-bit signed symmetric quantized (Used for the weights)
+    <li>QSYMM8: 8-bit unsigned symmetric quantized
+    <li>QSYMM16: 16-bit unsigned symmetric quantized
+    <li>F32: 32-bit single precision floating point
+    <li>F16: 16-bit half precision floating point
+    <li>S32: 32-bit signed integer
+    <li>U8: 8-bit unsigned char
+    <li>All: include all above data types
+  </ul>
+
+Compute Library supports the following data layouts (fast changing dimension from right to left):
+  <ul>
+    <li>NHWC: The native layout of Compute Library that delivers the best performance where channels are in the fastest changing dimension
+    <li>NCHW: Legacy layout where width is in the fastest changing dimension
+    <li>All: include all above data layouts
+  </ul>
+where N = batches, C = channels, H = height, W = width
+
+<table>
+<caption id="multi_row"></caption>
+<tr>
+  <th>Function
+  <th>Description
+  <th>Equivalent Android NNAPI Op
+  <th>Backends
+  <th>Data Layouts
+  <th>Data Types
+<tr>
+  <td rowspan="2">ActivationLayer
+  <td rowspan="2" style="width:200px;"> Function to simulate an activation layer with the specified activation function.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_ELU
+       <li>ANEURALNETWORKS_HARD_SWISH
+       <li>ANEURALNETWORKS_LOGISTIC
+       <li>ANEURALNETWORKS_RELU
+       <li>ANEURALNETWORKS_RELU1
+       <li>ANEURALNETWORKS_RELU6
+       <li>ANEURALNETWORKS_TANH
+      </ul>
+  <td>NEActivationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QSYMM16<td>QSYMM16
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLActivationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QSYMM16<td>QSYMM16
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td rowspan="2">ConcatenateLayer
+  <td rowspan="2" style="width:200px;"> Function to concatenate tensors along a given axis.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_CONCATENATION
+      </ul>
+  <td>NEConcatenateLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLConcatenateLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td rowspan="2">ConvertFullyConnectedWeights
+  <td rowspan="2" style="width:200px;"> Function to tranpose the wieghts for the fully connected layer.
+  <td rowspan="2">
+      <ul>
+       <li>None
+      </ul>
+  <td>NEConvertFullyConnectedWeights
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLConvertFullyConnectedWeights
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">Copy
+  <td rowspan="2" style="width:200px;"> Function to copy a tensor.
+  <td rowspan="2">
+      <ul>
+       <li>None
+      </ul>
+  <td>NECopy
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLCopy
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">DequantizationLayer
+  <td rowspan="2" style="width:200px;"> Function to dequantize the values in a tensor
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_DEQUANTIZE
+      </ul>
+  <td>NEDequantizationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>F16
+    <tr><td>QASYMM8<td>F32
+    <tr><td>QASYMM8_SIGNED<td>F16
+    <tr><td>QASYMM8_SIGNED<td>F32
+    <tr><td>QSYMM8_PER_CHANNEL<td>F16
+    <tr><td>QSYMM8_PER_CHANNEL<td>F32
+    <tr><td>QSYMM8<td>F16
+    <tr><td>QSYMM8<td>F32
+    <tr><td>QSYMM16<td>F16
+    <tr><td>QSYMM16<td>F32
+    </table>
+<tr>
+  <td>CLDequantizationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>F16
+    <tr><td>QASYMM8<td>F32
+    <tr><td>QASYMM8_SIGNED<td>F16
+    <tr><td>QASYMM8_SIGNED<td>F32
+    <tr><td>QSYMM8_PER_CHANNEL<td>F16
+    <tr><td>QSYMM8_PER_CHANNEL<td>F32
+    <tr><td>QSYMM8<td>F16
+    <tr><td>QSYMM8<td>F32
+    <tr><td>QSYMM16<td>F16
+    <tr><td>QSYMM16<td>F32
+    </table>
+<tr>
+  <td rowspan="2">DirectConvolutionLayer
+  <td rowspan="2" style="width:200px;"> Function to
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_CONV_2D
+      </ul>
+  <td>NEDirectConvolutionLayer
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src0<th>src1<th>src2<th>dst
+    <tr><td>F16<td>F16<td>F16<td>F16
+    <tr><td>F32<td>F32<td>F32<td>F32
+    </table>
+<tr>
+  <td>CLDirectConvolutionLayer
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src0<th>src1<th>src2<th>dst
+    <tr><td>F16<td>F16<td>F16<td>F16
+    <tr><td>F32<td>F32<td>F32<td>F32
+    <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+    </table>
+<tr>
+  <td rowspan="2">FFT1D
+  <td rowspan="2" style="width:200px;"> Fast Fourier Transform 1D
+  <td rowspan="2">
+      <ul>
+       <li>None
+      </ul>
+  <td>NEFFT1D
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLFFT1D
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    <tr><td>F16<td>F16
+    </table>
+<tr>
+  <td rowspan="2">FFT2D
+  <td rowspan="2" style="width:200px;"> Fast Fourier Transform 2D
+  <td rowspan="2">
+      <ul>
+       <li>None
+      </ul>
+  <td>NEFFT2D
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLFFT2D
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    <tr><td>F16<td>F16
+    </table>
+<tr>
+  <td rowspan="2">FFTConvolutionLayer
+  <td rowspan="2" style="width:200px;"> Fast Fourier Transform Convolution
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_CONV_2D
+      </ul>
+  <td>NEFFTConvolutionLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLFFTConvolutionLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    <tr><td>F16<td>F16
+    </table>
+<tr>
+  <td rowspan="2">Fill
+  <td rowspan="2" style="width:200px;"> Set the values of a tensor with a given value
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_FILL
+      </ul>
+  <td>NEFill
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLFill
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">Floor
+  <td rowspan="2" style="width:200px;"> Round the value to the lowest number
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_FLOOR
+      </ul>
+  <td>NEFloor
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    <tr><td>F16<td>F16
+    </table>
+<tr>
+  <td>CLFloor
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    <tr><td>F16<td>F16
+    </table>
+<tr>
+  <td rowspan="2">Permute
+  <td rowspan="2" style="width:200px;"> Function to transpose an ND tensor.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_TRANSPOSE
+      </ul>
+  <td>NEPermute
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLPermute
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">PixelWiseMultiplication
+  <td rowspan="2" style="width:200px;"> Function to performe a multiplication.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_MUL
+      </ul>
+  <td>NEPixelWiseMultiplication
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src0<th>src1<th>dst
+    <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
+    <tr><td>QSYMM16<td>QSYMM16<td>S32
+    <tr><td>U8<td>U8<td>U8
+    <tr><td>U8<td>U8<td>S16
+    <tr><td>U8<td>S16<td>S16
+    <tr><td>S16<td>U8<td>S16
+    <tr><td>S16<td>S16<td>S16
+    <tr><td>F16<td>F16<td>F16
+    <tr><td>F32<td>S32<td>F32
+    </table>
+<tr>
+  <td>CLPixelWiseMultiplication
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src0<th>src1<th>dst
+    <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
+    <tr><td>QSYMM16<td>QSYMM16<td>S32
+    <tr><td>U8<td>U8<td>U8
+    <tr><td>U8<td>U8<td>S16
+    <tr><td>U8<td>S16<td>S16
+    <tr><td>S16<td>U8<td>S16
+    <tr><td>S16<td>S16<td>S16
+    <tr><td>F16<td>F16<td>F16
+    <tr><td>F32<td>S32<td>F32
+    </table>
+<tr>
+  <td rowspan="2">PoolingLayer
+  <td rowspan="2" style="width:200px;"> Function to performe pooling with the specified pooling operation.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_AVERAGE_POOL_2D
+       <li>ANEURALNETWORKS_L2_POOL_2D
+       <li>ANEURALNETWORKS_MAX_POOL_2D
+      </ul>
+  <td>NEPoolingLayer
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLPoolingLayer
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td rowspan="2">PReluLayer
+  <td rowspan="2" style="width:200px;"> Function to compute the activation layer with the PRELU activation function.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_PRELU
+      </ul>
+  <td>NEPReluLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLPReluLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td rowspan="2">QuantizationLayer
+  <td rowspan="2" style="width:200px;"> Function to perform quantization layer
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_QUANTIZE
+      </ul>
+  <td>NEQuantizationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8<td>QASYMM16
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8_SIGNED<td>QASYMM16
+    <tr><td>F16<td>QASYMM8
+    <tr><td>F16<td>QASYMM8_SIGNED
+    <tr><td>F16<td>QASYMM16
+    <tr><td>F32<td>QASYMM8
+    <tr><td>F32<td>QASYMM8_SIGNED
+    <tr><td>F32<td>QASYMM16
+    </table>
+<tr>
+  <td>CLQuantizationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8<td>QASYMM16
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8_SIGNED<td>QASYMM16
+    <tr><td>F16<td>QASYMM8
+    <tr><td>F16<td>QASYMM8_SIGNED
+    <tr><td>F16<td>QASYMM16
+    <tr><td>F32<td>QASYMM8
+    <tr><td>F32<td>QASYMM8_SIGNED
+    <tr><td>F32<td>QASYMM16
+    </table>
+<tr>
+  <td rowspan="2">ReshapeLayer
+  <td rowspan="2" style="width:200px;"> Fucntion to reshape a tensor
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_RESHAPE
+       <li>ANEURALNETWORKS_SQUEEZE
+      </ul>
+  <td>NEReshapeLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLReshapeLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">Scale
+  <td rowspan="2" style="width:200px;"> Fucntion to perform resize a tensor using to interpolate: - Bilenear - Nearest neighbor
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_RESIZE_BILINEAR
+       <li>ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR
+      </ul>
+  <td>NEScale
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    <tr><td>U8<td>U8
+    <tr><td>S16<td>S16
+    </table>
+<tr>
+  <td>CLScale
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    <tr><td>U8<td>U8
+    <tr><td>S16<td>S16
+    </table>
+<tr>
+  <td rowspan="2">Slice
+  <td rowspan="2" style="width:200px;"> Function to perform tensor slicing.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_SLICE
+      </ul>
+  <td>NESlice
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLSlice
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">StridedSlice
+  <td rowspan="2" style="width:200px;"> Function to extract a strided slice of a tensor.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_STRIDED_SLICE
+      </ul>
+  <td>NEStridedSlice
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLStridedSlice
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">Transpose
+  <td rowspan="2" style="width:200px;"> Function to transpose an 2D tensor.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_TRANSPOSE
+      </ul>
+  <td>NETranspose
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLTranspose
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+</table>
+
+*/
+} // namespace
+\ No newline at end of file
diff --git a/docs/Doxyfile b/docs/Doxyfile
index 9d711486c0..6fb5de7020 100644
--- a/docs/Doxyfile
+++ b/docs/Doxyfile
@@ -777,6 +777,7 @@ INPUT                  = ./docs/00_introduction.dox \
                          ./docs/06_functions_list.dox \
                          ./docs/07_errata.dox \
                          ./docs/08_api.dox \
+                         ./docs/09_operators_list.dox \
                          ./docs/ComputeLibrary.dir \
                          ./arm_compute/ \
                          ./src/ \
author	Sheri Zhang <sheri.zhang@arm.com>	2021-04-22 14:41:12 +0100
committer	Sheri Zhang <sheri.zhang@arm.com>	2021-04-28 12:52:32 +0000
commit	a47dcc229d912d4e4bb5afa37220d20451f243a7 (patch)
tree	f8b296701fbdebfc7d29abc09144c49619bcca1c
parent	2b7fee089c76226bfafcae77ba49f1eddb1e01da (diff)
download	ComputeLibrary-a47dcc229d912d4e4bb5afa37220d20451f243a7.tar.gz