46 files changed, 2291 insertions, 63 deletions
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index aa62183909..e6bb192532 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -59,6 +59,19 @@ public:
     CLActivationLayer &operator=(CLActivationLayer &&);
     /** Set the input and output tensor.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |QSYMM16        |QSYMM16        |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
+     *
      * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
      *
      * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
index bfc8a39ac9..71e84e21b5 100644
--- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
@@ -64,11 +64,22 @@ public:
     CLConcatenateLayer &operator=(CLConcatenateLayer &&);
     /** Initialise the kernel's inputs vector and output.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
      * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel,
      *       @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
      *
-     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out]    output        Output tensor. Data types supported: Same as @p input.
      * @param[in]     axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      */
@@ -80,7 +91,7 @@ public:
      *       @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
      *
      * @param[in]     compile_context The compile context to be used.
-     * @param[in,out] inputs_vector   The vectors containing all the tensors to concatenate. Data types supported: All
+     * @param[in,out] inputs_vector   The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out]    output          Output tensor. Data types supported: Same as @p input.
      * @param[in]     axis            Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      */
@@ -91,7 +102,7 @@ public:
      * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel,
      *       @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
      *
-     * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: All.
+     * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in] output        Output tensor info. Data types supported: Same as @p input.
      * @param[in] axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      *
diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
index 8892dbad6c..6c7d9e52e8 100644
--- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
@@ -55,6 +55,15 @@ public:
     CLConvertFullyConnectedWeights &operator=(CLConvertFullyConnectedWeights &&) = default;
     /** Initialize the function.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
      * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
      * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h
index 795a183e1f..4fc4183d3e 100644
--- a/arm_compute/runtime/CL/functions/CLCopy.h
+++ b/arm_compute/runtime/CL/functions/CLCopy.h
@@ -53,6 +53,14 @@ public:
     CLCopy &operator=(CLCopy &&);
     /** Initialise the function's source and destination.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input      Source tensor. Data types supported: All.
      * @param[out] output     Output tensor. Data types supported: Same as @p input.
      * @param[in]  dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
index 7ecf094df3..4a5c3a3203 100644
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
@@ -54,6 +54,23 @@ public:
     CLDequantizationLayer &operator=(CLDequantizationLayer &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src                |dst            |
+     * |:------------------|:--------------|
+     * |QASYMM8            |F16            |
+     * |QASYMM8            |F32            |
+     * |QASYMM8_SIGNED     |F16            |
+     * |QASYMM8_SIGNED     |F32            |
+     * |QSYMM8_PER_CHANNEL |F16            |
+     * |QSYMM8_PER_CHANNEL |F32            |
+     * |QSYMM8             |F16            |
+     * |QSYMM8             |F32            |
+     * |QSYMM16            |F16            |
+     * |QSYMM16            |F32            |
+     *
      * @param[in]  input  Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.
      *                    Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
index 6e9e2161b9..3fd0c63782 100644
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
@@ -55,6 +55,18 @@ public:
     CLDirectConvolutionLayer &operator=(CLDirectConvolutionLayer &&);
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src0           |src1           |src2   |dst            |
+     * |:--------------|:--------------|:------|:--------------|
+     * |F16            |F16            |F16    |F16            |
+     * |F32            |F32            |F32    |F32            |
+     * |QASYMM8        |QASYMM8        |S32    |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32    |QASYMM8_SIGNED |
+     *
      * @param[in]  input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
      *                       while every optional dimension from 4 and above represent a batch of inputs.
      *                       Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h
index 731bad5c32..c7112dc737 100644
--- a/arm_compute/runtime/CL/functions/CLFFT1D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT1D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,6 +61,15 @@ public:
     ~CLFFT1D();
     /** Initialise the function's source, destinations and border mode.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     * |F16    |F16    |
+     *
      * @param[in]  input  Source tensor. Data types supported: F16/F32.
      * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
      * @param[in]  config FFT related configuration
diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h
index adc8e46cb2..3d20327bf1 100644
--- a/arm_compute/runtime/CL/functions/CLFFT2D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT2D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,6 +58,15 @@ public:
     ~CLFFT2D();
     /** Initialise the function's source, destinations and border mode.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     * |F16    |F16    |
+     *
      * @param[in]  input  Source tensor. Data types supported: F16/F32.
      * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
      * @param[in]  config FFT related configuration
diff --git a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
index 5085f5a66c..f873cb0b86 100644
--- a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -71,6 +71,15 @@ public:
     CLFFTConvolutionLayer &operator=(CLFFTConvolutionLayer &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     * |F16    |F16    |
+     *
      * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
      *
      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h
index 9a27d158a6..a01e0c3188 100644
--- a/arm_compute/runtime/CL/functions/CLFill.h
+++ b/arm_compute/runtime/CL/functions/CLFill.h
@@ -52,6 +52,14 @@ public:
     CLFill &operator=(CLFill &&);
     /** Initialize the kernel's tensor and filling value
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in,out] tensor         Input tensor to fill. Supported data types: All.
      * @param[in]     constant_value The value used to fill the planes of the tensor
      * @param[in]     window         Window to be used in case setting only part of a tensor. Default is nullptr.
diff --git a/arm_compute/runtime/CL/functions/CLFloor.h b/arm_compute/runtime/CL/functions/CLFloor.h
index 7c7607002d..87cd5b44c7 100644
--- a/arm_compute/runtime/CL/functions/CLFloor.h
+++ b/arm_compute/runtime/CL/functions/CLFloor.h
@@ -54,6 +54,15 @@ public:
     CLFloor &operator=(CLFloor &&);
     /** Set the source, destination of the kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     * |F16    |F16    |
+     *
      * @param[in]  input  Source tensor. Data type supported: F16/F32.
      * @param[out] output Destination tensor. Same as @p input
      */
diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h
index 7b6667044e..b123d0e46e 100644
--- a/arm_compute/runtime/CL/functions/CLPReluLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h
@@ -53,6 +53,17 @@ public:
     CLPReluLayer &operator=(CLPReluLayer &&);
     /** Set the input and output tensor.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
      *
      * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h
index e59cca2a67..8e15da2287 100644
--- a/arm_compute/runtime/CL/functions/CLPermute.h
+++ b/arm_compute/runtime/CL/functions/CLPermute.h
@@ -53,6 +53,15 @@ public:
     CLPermute &operator=(CLPermute &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
      *
      * @param[in] input  The input tensor to permute. Data types supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index 9f70b05a35..14422383ad 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -52,19 +52,23 @@ public:
     CLPixelWiseMultiplication &operator=(CLPixelWiseMultiplication &&);
     /** Initialise the kernel's inputs, output and convertion policy.
      *
-     * Valid configurations (Input1,Input2) -> Output :
+     * Valid data layouts:
+     * - All
      *
-     *   - (U8,U8)                         -> U8
-     *   - (U8,U8)                         -> S16
-     *   - (U8,S16)                        -> S16
-     *   - (S16,U8)                        -> S16
-     *   - (S16,S16)                       -> S16
-     *   - (F16,F16)                       -> F16
-     *   - (F32,F32)                       -> F32
-     *   - (QASYMM8,QASYMM8)               -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16)               -> QSYMM16
-     *   - (QSYMM16,QSYMM16)               -> S32
+     * Valid data type configurations:
+     * |src0           |src1           |dst            |
+     * |:--------------|:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |QSYMM16        |QSYMM16        |QASYMM16       |
+     * |QSYMM16        |QSYMM16        |S32            |
+     * |U8             |U8             |U8             |
+     * |U8             |U8             |S16            |
+     * |U8             |S16            |S16            |
+     * |S16            |U8             |S16            |
+     * |S16            |S16            |S16            |
+     * |F16            |F16            |F16            |
+     * |F32            |S32            |F32            |
      *
      * @param[in, out] input1          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -81,20 +85,6 @@ public:
                    ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Initialise the kernel's inputs, output and convertion policy.
      *
-     * Valid configurations (Input1,Input2) -> Output :
-     *
-     *   - (U8,U8)                         -> U8
-     *   - (U8,U8)                         -> S16
-     *   - (U8,S16)                        -> S16
-     *   - (S16,U8)                        -> S16
-     *   - (S16,S16)                       -> S16
-     *   - (F16,F16)                       -> F16
-     *   - (F32,F32)                       -> F32
-     *   - (QASYMM8,QASYMM8)               -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16)               -> QSYMM16
-     *   - (QSYMM16,QSYMM16)               -> S32
-     *
      * @param[in]      compile_context The compile context to be used.
      * @param[in, out] input1          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -111,21 +101,6 @@ public:
                    ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplication
      *
-     * Valid configurations (Input1,Input2) -> Output :
-     *
-     *   - (U8,U8)                         -> U8
-     *   - (U8,U8)                         -> S16
-     *   - (U8,S16)                        -> S16
-     *   - (S16,U8)                        -> S16
-     *   - (S16,S16)                       -> S16
-     *   - (F16,F16)                       -> F16
-     *   - (F32,F32)                       -> F32
-     *   - (QASYMM8,QASYMM8)               -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16)               -> QSYMM16
-     *   - (QSYMM16,QSYMM16)               -> S32
-     *
-     *
      * @param[in] input1          An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      * @param[in] input2          An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      * @param[in] output          The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
index ec1c4eb768..902feca234 100644
--- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
@@ -54,6 +54,18 @@ public:
     CLPoolingLayer &operator=(CLPoolingLayer &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @param[in,out] input     Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out]    output    Destination tensor. Data types supported: Same as @p input.
      * @param[in]     pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
index c5dad53513..6a44a226d4 100644
--- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
@@ -59,6 +59,25 @@ public:
     CLQuantizationLayer &operator=(CLQuantizationLayer &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src                |dst            |
+     * |:------------------|:--------------|
+     * |QASYMM8            |QASYMM8        |
+     * |QASYMM8            |QASYMM8_SIGNED |
+     * |QASYMM8            |QASYMM16       |
+     * |QASYMM8_SIGNED     |QASYMM8        |
+     * |QASYMM8_SIGNED     |QASYMM8_SIGNED |
+     * |QASYMM8_SIGNED     |QASYMM16       |
+     * |F16                |QASYMM8        |
+     * |F16                |QASYMM8_SIGNED |
+     * |F16                |QASYMM16       |
+     * |F32                |QASYMM8        |
+     * |F32                |QASYMM8_SIGNED |
+     * |F32                |QASYMM16       |
+     *
      * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
      * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
      *
diff --git a/arm_compute/runtime/CL/functions/CLReshapeLayer.h b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
index 60ed81680e..7346b65e9b 100644
--- a/arm_compute/runtime/CL/functions/CLReshapeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
@@ -52,6 +52,14 @@ public:
     CLReshapeLayer &operator=(CLReshapeLayer &&);
     /** Initialise the kernel's inputs and outputs
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input  First tensor input. Data type supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h
index 8a67f74bb6..ddb4a23531 100644
--- a/arm_compute/runtime/CL/functions/CLScale.h
+++ b/arm_compute/runtime/CL/functions/CLScale.h
@@ -55,6 +55,20 @@ public:
 
     /** Initialize the function's source, destination, interpolation type and border_mode.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     * |U8             |U8             |
+     * |S16            |S16            |
+     *
      * @param[in,out] input  Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
      * @param[out]    output Destination tensor. Data types supported: Same as @p input
      *                       All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h
index f17e77236d..7a7689c528 100644
--- a/arm_compute/runtime/CL/functions/CLSlice.h
+++ b/arm_compute/runtime/CL/functions/CLSlice.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -91,6 +91,14 @@ public:
     CLSlice &operator=(CLSlice &&);
     /** Configure kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @note Supported tensor rank: up to 4
      * @note Start indices must be non-negative. 0 <= starts[i]
      * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
diff --git a/arm_compute/runtime/CL/functions/CLStridedSlice.h b/arm_compute/runtime/CL/functions/CLStridedSlice.h
index fdbef81f7d..6fab0c0186 100644
--- a/arm_compute/runtime/CL/functions/CLStridedSlice.h
+++ b/arm_compute/runtime/CL/functions/CLStridedSlice.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,6 +54,14 @@ public:
     CLStridedSlice &operator=(CLStridedSlice &&);
     /** Configure kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @note Supported tensor rank: up to 4
      *
      * @param[in]  input            Source tensor. Data type supported: All.
diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h
index 43cebeba90..a866aeabaa 100644
--- a/arm_compute/runtime/CL/functions/CLTranspose.h
+++ b/arm_compute/runtime/CL/functions/CLTranspose.h
@@ -53,6 +53,14 @@ public:
     CLTranspose &operator=(CLTranspose &&) = default;
     /** Initialise the kernel's inputs and output
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input  Input tensor. Data types supported: All.
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index ffda8406aa..b39a8d7701 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -62,6 +62,18 @@ public:
     /** [NEActivationLayer snippet] **/
     /** Set the input and output tensor.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |QSYMM16        |QSYMM16        |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
      *
      * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index 8f9fd27906..734e3502dd 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -51,19 +51,25 @@ public:
     NEArithmeticAddition &operator=(NEArithmeticAddition &&);
     /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * Valid configurations (Input1,Input2) -> Output :
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
      *
-     *   - (U8,U8)           -> U8
-     *   - (U8,U8)           -> S16
-     *   - (S16,U8)          -> S16
-     *   - (U8,S16)          -> S16
-     *   - (S16,S16)         -> S16
-     *   - (S32,S32)         -> S32
-     *   - (F16,F16)         -> F16
-     *   - (F32,F32)         -> F32
-     *   - (QASYMM8,QASYMM8) -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16) -> QSYMM16
+     * Valid data type configurations:
+     * |src0           |src1           |dst            |
+     * |:--------------|:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |QSYMM16        |QSYMM16        |QASYMM16       |
+     * |QSYMM16        |QSYMM16        |S32            |
+     * |U8             |U8             |U8             |
+     * |U8             |U8             |S16            |
+     * |U8             |S16            |S16            |
+     * |S16            |U8             |S16            |
+     * |S16            |S16            |S16            |
+     * |S32            |S32            |S32            |
+     * |F16            |F16            |F16            |
+     * |F32            |F32            |F32            |
      *
      * @param[in]  input1   First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
      * @param[in]  input2   Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index 6aa724ab0c..dd1c709d76 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -55,6 +55,17 @@ public:
     NEConcatenateLayer &operator=(NEConcatenateLayer &&);
     /** Initialise the kernel's inputs vector and output.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
      * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel,
      *       @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel.
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index f9ce66db13..218877d421 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -51,6 +51,15 @@ public:
     ~NEConvertFullyConnectedWeights();
     /** Initialize the function.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
      * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
      * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index 56f8bd9803..ee02c259f4 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -53,6 +53,14 @@ public:
     NECopy &operator=(NECopy &&);
     /** Initialise the function's source and destination.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     *
      * @param[in]  input  Source tensor. Data types supported: All
      * @param[out] output Output tensor. Data types supported: Same as @p input.
      *
diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
index a18566e6ca..dfec835f45 100644
--- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
@@ -53,6 +53,23 @@ public:
     NEDequantizationLayer &operator=(NEDequantizationLayer &&) = default;
     /** Configure the kernel.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src                |dst            |
+     * |:------------------|:--------------|
+     * |QASYMM8            |F16            |
+     * |QASYMM8            |F32            |
+     * |QASYMM8_SIGNED     |F16            |
+     * |QASYMM8_SIGNED     |F32            |
+     * |QSYMM8_PER_CHANNEL |F16            |
+     * |QSYMM8_PER_CHANNEL |F32            |
+     * |QSYMM8             |F16            |
+     * |QSYMM8             |F32            |
+     * |QSYMM16            |F16            |
+     * |QSYMM16            |F32            |
+     *
      * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
      */
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index fc4017e635..82cabed6c9 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -58,6 +58,16 @@ public:
     ~NEDirectConvolutionLayer();
     /** Set the input, weights, biases and output tensors.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src0   |src1   |src2   |dst    |
+     * |:------|:------|:------|:------|
+     * |F16    |F16    |F16    |F16    |
+     * |F32    |F32    |F32    |F32    |
+     *
      * @note: DirectConvolution only works in the following configurations:
      *    1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
      *    3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h
index a533aa7f48..9654b1e604 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT1D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h
@@ -63,6 +63,14 @@ public:
     ~NEFFT1D();
     /** Initialise the function's source and destinations.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     *
      * @param[in]  input  Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
      * @param[out] output Destination tensor.  Data types and data layouts supported: Same as @p input.
      *                    Number of channels supported: 1 (real tensor) or 2 (complex tensor).If @p input is real, @p output must be complex.
diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h
index ce84a85105..57f38d1942 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT2D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h
@@ -58,6 +58,14 @@ public:
     ~NEFFT2D();
     /** Initialise the function's source and destinations
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     *
      * @param[in]  input  Source tensor. Data types supported: F32.
      * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
      * @param[in]  config FFT related configuration
diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
index 213fa6093b..c5f4d45b6b 100644
--- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
@@ -73,6 +73,14 @@ public:
     ~NEFFTConvolutionLayer();
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     *
      * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
      *
      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h
index ba5d020496..e923ce33e1 100644
--- a/arm_compute/runtime/NEON/functions/NEFill.h
+++ b/arm_compute/runtime/NEON/functions/NEFill.h
@@ -53,6 +53,14 @@ public:
     NEFill &operator=(NEFill &&);
     /** Initialize the function
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @param[in,out] tensor         Source tensor. Data types supported: All
      * @param[in]     constant_value Constant value to use to fill tensor.
      */
diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h
index 9560eb9169..4d47b068db 100644
--- a/arm_compute/runtime/NEON/functions/NEFloor.h
+++ b/arm_compute/runtime/NEON/functions/NEFloor.h
@@ -54,6 +54,15 @@ public:
     ~NEFloor();
     /** Set the source, destination of the kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |F32    |F32    |
+     * |F16    |F16    |
+     *
      * @param[in]  input  Source tensor. Data type supported: F16/F32.
      * @param[out] output Destination tensor. Same as @p input
      */
diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
index b07febfe7f..81d5fd162c 100644
--- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
@@ -55,6 +55,17 @@ public:
     NEPReluLayer &operator=(NEPReluLayer &&);
     /** Set the input and output tensor.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in]  alpha  Source alpha tensor. Data types supported: same of @p input.
      * @param[out] output Destination tensor. Data type supported: same as @p input
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index 2508458a3d..c863fde0ac 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -54,6 +54,15 @@ public:
     NEPermute &operator=(NEPermute &&) = default;
     /** Configure the permute function
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
      *
      * @param[in]  input  The input tensor to permute. Data types supported: All
diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
index 6f4cce3cde..f8074e791a 100644
--- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
+++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
@@ -52,6 +52,24 @@ public:
     NEPixelWiseMultiplication &operator=(NEPixelWiseMultiplication &&) = default;
     /** Initialise the kernel's inputs, output and convertion policy.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src0           |src1           |dst            |
+     * |:--------------|:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |QSYMM16        |QSYMM16        |QASYMM16       |
+     * |QSYMM16        |QSYMM16        |S32            |
+     * |U8             |U8             |U8             |
+     * |U8             |U8             |S16            |
+     * |U8             |S16            |S16            |
+     * |S16            |U8             |S16            |
+     * |S16            |S16            |S16            |
+     * |F16            |F16            |F16            |
+     * |F32            |S32            |F32            |
+     *
      * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
      *       For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
      *
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index cb136ebca9..851dc0ca32 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -59,6 +59,18 @@ public:
     ~NEPoolingLayer();
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     *
      * @note F16 is supported for pool sizes 2 and 3 only
      *
      * @param[in, out] input     Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index 9e2d9ecf24..a7fadfc7cd 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -52,6 +52,25 @@ public:
     NEQuantizationLayer &operator=(NEQuantizationLayer &&) = default;
     /** Set the input and output tensors.
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src                |dst            |
+     * |:------------------|:--------------|
+     * |QASYMM8            |QASYMM8        |
+     * |QASYMM8            |QASYMM8_SIGNED |
+     * |QASYMM8            |QASYMM16       |
+     * |QASYMM8_SIGNED     |QASYMM8        |
+     * |QASYMM8_SIGNED     |QASYMM8_SIGNED |
+     * |QASYMM8_SIGNED     |QASYMM16       |
+     * |F16                |QASYMM8        |
+     * |F16                |QASYMM8_SIGNED |
+     * |F16                |QASYMM16       |
+     * |F32                |QASYMM8        |
+     * |F32                |QASYMM8_SIGNED |
+     * |F32                |QASYMM16       |
+     *
      * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
      * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
      */
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index b8c0a841bc..3e6e33f797 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -52,6 +52,14 @@ public:
     NEReshapeLayer &operator=(NEReshapeLayer &&);
     /** Initialise the kernel's inputs and outputs
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @param[in]  input  Input tensor. Data type supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h
index 45658a7cd3..233ee2969e 100644
--- a/arm_compute/runtime/NEON/functions/NEScale.h
+++ b/arm_compute/runtime/NEON/functions/NEScale.h
@@ -48,6 +48,20 @@ public:
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(NEScale);
     /** Initialize the function's source, destination, interpolation type and border_mode.
      *
+     * Valid data layouts:
+     * - NHWC
+     * - NCHW
+     *
+     * Valid data type configurations:
+     * |src            |dst            |
+     * |:--------------|:--------------|
+     * |QASYMM8        |QASYMM8        |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |F16            |F16            |
+     * |F32            |F32            |
+     * |U8             |U8             |
+     * |S16            |S16            |
+     *
      * @param[in, out] input  Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
      * @param[out]     output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
      * @param[in]      info   @ref ScaleKernelInfo to be used for configuration
diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h
index 28628778cb..214ffa512c 100644
--- a/arm_compute/runtime/NEON/functions/NESlice.h
+++ b/arm_compute/runtime/NEON/functions/NESlice.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -89,6 +89,14 @@ public:
 
     /** Configure kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @note Supported tensor rank: up to 4
      * @note Start indices must be non-negative. 0 <= starts[i]
      * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
index f9c94f5301..7ba6a52a58 100644
--- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h
+++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -95,6 +95,14 @@ public:
 
     /** Configure kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @note Supported tensor rank: up to 4
      *
      * @param[in]  input            Source tensor. Data type supported: All
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 78916f67b7..581fe74309 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -54,6 +54,14 @@ public:
     NETranspose &operator=(NETranspose &&) = default;
     /** Initialise the kernel's inputs and output
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @param[in]  input  Input tensor. Data types supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
diff --git a/arm_compute/runtime/OperatorList.h b/arm_compute/runtime/OperatorList.h
new file mode 100644
index 0000000000..8c43c68b90
--- /dev/null
+++ b/arm_compute/runtime/OperatorList.h
@@ -0,0 +1,1028 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_OPERATOR_LIST_H
+#define ARM_COMPUTE_OPERATOR_LIST_H
+
+/** ActivationLayer
+ *
+ * Description:
+ * Function to simulate an activation layer with the specified activation function.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ELU
+ * ANEURALNETWORKS_HARD_SWISH
+ * ANEURALNETWORKS_LOGISTIC
+ * ANEURALNETWORKS_RELU
+ * ANEURALNETWORKS_RELU1
+ * ANEURALNETWORKS_RELU6
+ * ANEURALNETWORKS_TANH
+ *
+ */
+
+/** ArgMinMaxLayer (not ported)
+ *
+ * Description:
+ * Function to calculate the index of the minimum or maximum values in a tensor based on an axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ARGMAX
+ * ANEURALNETWORKS_ARGMIN
+ *
+ */
+
+/** ArithmeticAddition (no CL)
+ *
+ * Description:
+ * Function to add 2 tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ADD
+ *
+ */
+
+/** ArithmeticSubtraction (no CL)
+ *
+ * Description:
+ * Function to substract 2 tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SUB
+ *
+ */
+
+/** BatchNormalizationLayer (not ported)
+ *
+ * Description:
+ * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f]
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** BatchToSpaceLayer (not ported)
+ *
+ * Description:
+ * Rearranges (permutes) data from batch into blocks of spatial data, followed by cropping. It is the reverse transformation of SpaceToBatch (from TF website)
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_BATCH_TO_SPACE_ND
+ *
+ */
+
+/** BitwiseAnd (not ported)
+ *
+ * Description:
+ * Function to performe bitwise AND between 2 tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_LOGICAL_AND
+ *
+ */
+
+/** BitwiseNot (not ported)
+ *
+ * Description:
+ * Function to performe bitwise NOT.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_LOGICAL_NOT
+ *
+ */
+
+/** BitwiseOr (not ported)
+ *
+ * Description:
+ * Function to performe bitwise OR between 2 tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_LOGICAL_OR
+ *
+ */
+
+/** BitwiseXor (not ported)
+ *
+ * Description:
+ * Function to performe bitwise XOR between 2 tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** BoundingBoxTransform (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ?
+ *
+ */
+
+/** Cast (not ported)
+ *
+ * Description:
+ * Function to cast a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CAST
+ *
+ */
+
+/** ChannelShuffelLayer (not ported)
+ *
+ * Description:
+ * Function to cast a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CHANNEL_SHUFFLE
+ *
+ */
+
+/** Comparison (not ported) (only CL)
+ *
+ * Description:
+ * Function to cast a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_EQUAL
+ * ANEURALNETWORKS_GREATER
+ * ANEURALNETWORKS_GREATER_EQUAL
+ * ANEURALNETWORKS_LESS
+ * ANEURALNETWORKS_LESS_EQUAL
+ * ANEURALNETWORKS_NOT_EQUAL
+ *
+ */
+
+/** ConcatenateLayer
+ *
+ * Description:
+ * Function to concatenate tensors along a given axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CONCATENATION
+ *
+ */
+
+/** ConvertFullyConnectedWeights
+ *
+ * Description:
+ * Function to tranpose the wieghts for the fully connected layer.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** ConvolutionLayer (not ported)
+ *
+ * Description:
+ * Function to compute a convolution layer.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CONV_2D
+ *
+ */
+
+/** Copy
+ *
+ * Description:
+ * Function to copy a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** Crop (only CL)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ?
+ *
+ */
+
+/** CropResize (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ?
+ *
+ */
+
+/** DeconvolutionLayer (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TRANSPOSE_CONV_2D
+ *
+ */
+
+/** DeconvolutionLayerUpsample (only CL) (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TRANSPOSE_CONV_2D
+ *
+ */
+
+/** DepthConverterLayer (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** DepthToSpaceLayer (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_DEPTH_TO_SPACE
+ *
+ */
+
+/** DepthwiseConvolutionLayer (not ported)
+ *
+ * Description:
+ * Function to perform depthwise separable convolution
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_DEPTHWISE_CONV_2D
+ *
+ */
+
+/** DequantizationLayer
+ *
+ * Description:
+ * Function to dequantize the values in a tensor
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_DEQUANTIZE
+ *
+ */
+
+/** DetectionPostProcessLayer (not ported) (no CL)
+ *
+ * Description:
+ * Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS)
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_DETECTION_POSTPROCESSING
+ *
+ */
+
+/** DirectConvolutionLayer
+ *
+ * Description:
+ * Function to
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CONV_2D
+ *
+ */
+
+/** DirectDeconvolutionLayer (only CL)
+ *
+ * Description:
+ * Function to
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TRANSPOSE_CONV_2D
+ *
+ */
+
+/** ElementWiseOperations (skip)
+ *
+ * Description:
+ * Function to perform in Cpu:
+ * - Div
+ * - Max
+ * - Min
+ * - Pow
+ * - SquaredDiff
+ * - Comparisons (Equal, greater, greater_equal, less, less_equal, not_equal)
+ *
+ * Function to perform in CL:
+ * - Add
+ * - Sub
+ * - Div
+ * - Max
+ * - Min
+ * - Pow
+ * - SquaredDiff
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_MAXIMUM
+ * ANEURALNETWORKS_MINIMUM
+ * ANEURALNETWORKS_POW
+ * ANEURALNETWORKS_DIV
+ * ANEURALNETWORKS_ADD (only CL)
+ * ANEURALNETWORKS_SUB (only CL)
+ * ANEURALNETWORKS_EQUAL (no CL)
+ * ANEURALNETWORKS_GREATER (no CL)
+ * ANEURALNETWORKS_GREATER_EQUAL (no CL)
+ * ANEURALNETWORKS_LESS (no CL)
+ * ANEURALNETWORKS_LESS_EQUAL (no CL)
+ * ANEURALNETWORKS_NOT_EQUAL (no CL)
+ *
+ */
+
+/** ElementWiseOperationUnary (skip)
+ *
+ * Description:
+ * Function to perform:
+ * - Rsqrt
+ * - Exp
+ * - Neg
+ * - Log
+ * - Abs
+ * - Round
+ * - Sin
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ABS
+ * ANEURALNETWORKS_EXP
+ * ANEURALNETWORKS_LOG
+ * ANEURALNETWORKS_NEG
+ * ANEURALNETWORKS_RSQRT
+ * ANEURALNETWORKS_SIN
+ *
+ */
+
+/** FFT1D
+ *
+ * Description:
+ * Fast Fourier Transform 1D
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** FFT2D
+ *
+ * Description:
+ * Fast Fourier Transform 2D
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** FFTConvolutionLayer
+ *
+ * Description:
+ * Fast Fourier Transform Convolution
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_CONV_2D
+ *
+ */
+
+/** Fill
+ *
+ * Description:
+ * Set the values of a tensor with a given value
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_FILL
+ *
+ */
+
+/** FillBorder (not ported)
+ *
+ * Description:
+ *
+ *
+ * Equivalent Android NNAPI Op:
+ * ?
+ *
+ */
+
+/** FlattenLayer (not ported)
+ *
+ * Description:
+ * Reshape a tensor to be 1D
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_RESHAPE
+ *
+ */
+
+/** Floor
+ *
+ * Description:
+ * Round the value to the lowest number
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_FLOOR
+ *
+ */
+
+/** FullyConnectedLayer (not ported)
+ *
+ * Description:
+ * Function to perform a fully connected / dense layer
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_FULLY_CONNECTED
+ *
+ */
+
+/** FuseBatchNormalization (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** Gather (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_GATHER
+ *
+ */
+
+/** GEMM (not ported)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GEMMConv2D (not ported) (no CL)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GEMMConvolutionLayer (not ported)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GEMMDeconvolutionLayer (not ported) (only CL)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GEMMLowpMatrixMultiplyCore (not ported)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GEMMLowpOutputStage (not ported)
+ *
+ * Description:
+ * General Matrix Multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** GenerateProposalsLayer (not ported)
+ *
+ * Description:
+ * Function to generate proposals for a RPN (Region Proposal Network).
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_GENERATE_PROPOSALS
+ *
+ */
+
+/** InstanceNormalizationLayer (not ported)
+ *
+ * Description:
+ * Function to perform a Instance normalization on a given axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_INSTANCE_NORMALIZATION
+ *
+ */
+
+/** L2NormalizationLayer (not ported)
+ *
+ * Description:
+ * Function to perform a L2 normalization on a given axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_L2_NORMALIZATION
+ *
+ */
+
+/** Logical (no CL)
+ *
+ * Description:
+ * Function to perform:
+ * - Logical AND
+ * - Logical OR
+ * - Logical NOT
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** LogicalAnd (only CL)
+ *
+ * Description:
+ * Function to perform Logical AND
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** LogicalOr (only CL)
+ *
+ * Description:
+ * Function to perform Logical OR
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** LogicalNot (only CL)
+ *
+ * Description:
+ * Function to perform Logical NOT
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** LSTMLayer (not ported)
+ *
+ * Description:
+ * Function to perform LSTM
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_LSTM
+ *
+ */
+
+/** LSTMLayerQuantized (not ported)
+ *
+ * Description:
+ * Function to perform LSTM
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_QUANTIZED_LSTM
+ * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM ?
+ *
+ */
+
+/** MaxUnpoolingLayer (not ported)
+ *
+ * Description:
+ * Function to perform MaxUnpooling
+ *
+ * Equivalent Android NNAPI Op:
+ *  ?
+ *
+ */
+
+/** MeanStdDevNormalizationLayer (not ported)
+ *
+ * Description:
+ * Function to execute mean and standard deviation normalization.
+ *
+ * Equivalent Android NNAPI Op:
+ * None ?
+ *
+ */
+
+/** MeanStdDevNormalizationLayer (not ported)
+ *
+ * Description:
+ * Function to execute mean and standard deviation normalization.
+ *
+ * Equivalent Android NNAPI Op:
+ * None ?
+ *
+ */
+
+/** NormalizationLayer (not ported)
+ *
+ * Description:
+ * Function to compute normalization layer.
+ *
+ * Equivalent Android NNAPI Op:
+ * None ?
+ *
+ */
+
+/** PadLayer (not ported)
+ *
+ * Description:
+ * Function to pad a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_PAD
+ * ANEURALNETWORKS_PAD_V2
+ *
+ */
+
+/** Permute
+ *
+ * Description:
+ * Function to transpose an ND tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TRANSPOSE
+ *
+ */
+
+/** PixelWiseMultiplication
+ *
+ * Description:
+ * Function to performe a multiplication.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_MUL
+ *
+ */
+
+/** PoolingLayer
+ *
+ * Description:
+ * Function to performe pooling with the specified pooling operation.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_AVERAGE_POOL_2D
+ * ANEURALNETWORKS_L2_POOL_2D
+ * ANEURALNETWORKS_MAX_POOL_2D
+ *
+ */
+
+/** PReluLayer
+ *
+ * Description:
+ * Function to compute the activation layer with the PRELU activation function.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_PRELU
+ *
+ */
+
+/** PriorBoxLayer (not ported)
+ *
+ * Description:
+ * Function to compute the activation layer with the PRELU activation function.
+ *
+ * Equivalent Android NNAPI Op:
+ * ?
+ *
+ */
+
+/** QLSTMLayer (not ported)
+ *
+ * Description:
+ * Function to perform LSTM
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_QUANTIZED_LSTM
+ * ANEURALNETWORKS_QUANTIZED_16BIT_LSTM ?
+ *
+ */
+
+/** QuantizationLayer
+ *
+ * Description:
+ * Function to perform quantization layer
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_QUANTIZE
+ *
+ */
+
+/** Range (not ported)
+ *
+ * Description:
+ * Function to .
+ *
+ * Equivalent Android NNAPI Op:
+ * none?
+ *
+ */
+
+/** RecudeMean (not ported)
+ *
+ * Description:
+ * Function to performe reduce mean operation.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_MEAN
+ *
+ */
+
+/** RecudeOperation (not ported)
+ *
+ * Description:
+ * Function to performe reduce mean operation.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_REDUCE_ALL
+ * ANEURALNETWORKS_REDUCE_ANY
+ * ANEURALNETWORKS_REDUCE_MAX
+ * ANEURALNETWORKS_REDUCE_MIN
+ * ANEURALNETWORKS_REDUCE_PROD
+ * ANEURALNETWORKS_REDUCE_SUM
+ *
+ */
+
+/** RecudeOperation (not ported)
+ *
+ * Description:
+ * Function to performe reduce with the following operations
+ * - ARG_IDX_MAX: Index of the max value
+ * - ARG_IDX_MIN: Index of the min value
+ * - MEAN_SUM:    Mean of sum
+ * - PROD:        Product
+ * - SUM_SQUARE:  Sum of squares
+ * - SUM:         Sum
+ * - MIN:         Min
+ * - MAX:         Max
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_REDUCE_ALL
+ * ANEURALNETWORKS_REDUCE_ANY
+ * ANEURALNETWORKS_REDUCE_MAX
+ * ANEURALNETWORKS_REDUCE_MIN
+ * ANEURALNETWORKS_REDUCE_PROD
+ * ANEURALNETWORKS_REDUCE_SUM
+ *
+ */
+
+/** ReorgLayer (not ported)
+ *
+ * Description:
+ * Function to performe reorg
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** ReshapeLayer
+ *
+ * Description:
+ * Fucntion to reshape a tensor
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_RESHAPE
+ * ANEURALNETWORKS_SQUEEZE
+ *
+ */
+
+/** ReverseLayer (not ported)
+ *
+ * Description:
+ * Fucntion to .
+ *
+ * Equivalent Android NNAPI Op:
+ * None?
+ *
+ */
+
+/** RNNLayer (not ported)
+ *
+ * Description:
+ * Fucntion to perform RNN .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_RNN
+ *
+ */
+
+/** ROIAligmentLayer (not ported)
+ *
+ * Description:
+ * Fucntion to perform RNN .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ROI_ALIGN
+ *
+ */
+
+/** ROIPoolingLayer (not ported)
+ *
+ * Description:
+ * Fucntion to perform RNN .
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_ROI_POOLING
+ *
+ */
+
+/** Scale
+ *
+ * Description:
+ * Fucntion to perform resize a tensor using to interpolate:
+ * - Bilenear
+ * - Nearest neighbor
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_RESIZE_BILINEAR
+ * ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR
+ *
+ */
+
+/** Select (not ported)
+ *
+ * Description:
+ * Fucntion to select values from 2 tensors depending on an input tensor of booleans.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SELECT
+ *
+ */
+
+/** Slice
+ *
+ * Description:
+ * Function to perform tensor slicing.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SLICE
+ *
+ */
+
+/** SoftmaxLayer (skip)
+ *
+ * Description:
+ * Function to compute a SoftmaxLayer and a Log SoftmaxLayer.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_LOG_SOFTMAX
+ * ANEURALNETWORKS_SOFTMAX
+ *
+ */
+
+/** SpaceToBatchLayer (not ported)
+ *
+ * Description:
+ * Function to divide a tensor spatially.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SPACE_TO_BATCH_ND
+ *
+ */
+
+/** SpaceToDepthLayer (not ported)
+ *
+ * Description:
+ * Function to rearrange blocks of spatial data into depth.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SPACE_TO_DEPTH
+ *
+ */
+
+/** Split (not ported)
+ *
+ * Description:
+ * Function to split a tensor along a given axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_SPLIT
+ *
+ */
+
+/** StackLayer (not ported)
+ *
+ * Description:
+ * Function to stack tensors along an axis.
+ *
+ * Equivalent Android NNAPI Op:
+ * none
+ *
+ */
+
+/** StridedSlice
+ *
+ * Description:
+ * Function to extract a strided slice of a tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_STRIDED_SLICE
+ *
+ */
+
+/** Tile  (not ported)
+ *
+ * Description:
+ * Function to construct a tensor by tiling a given tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TILE
+ *
+ */
+
+/** Transpose
+ *
+ * Description:
+ * Function to transpose an 2D tensor.
+ *
+ * Equivalent Android NNAPI Op:
+ * ANEURALNETWORKS_TRANSPOSE
+ *
+ */
+
+/** Unstack (not ported)
+ *
+ * Description:
+ * Function to unpack a rank-R tensor into rank-(R-1) tensors.
+ *
+ * Equivalent Android NNAPI Op:
+ * none
+ *
+ */
+
+/** WinogradConvolutionLayer (not ported)
+ *
+ * Description:
+ * Function to.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+/** WinogradInputTransform (not ported) (only CL)
+ *
+ * Description:
+ * Function to.
+ *
+ * Equivalent Android NNAPI Op:
+ * None
+ *
+ */
+
+#endif /* ARM_COMPUTE_OPERATOR_LIST_H */
+\ No newline at end of file
diff --git a/docs/09_operators_list.dox b/docs/09_operators_list.dox
new file mode 100644
index 0000000000..82a127bbd3
--- /dev/null
+++ b/docs/09_operators_list.dox
@@ -0,0 +1,784 @@
+///
+/// Copyright (c) 2021 Arm Limited.
+///
+/// SPDX-License-Identifier: MIT
+///
+/// Permission is hereby granted, free of charge, to any person obtaining a copy
+/// of this software and associated documentation files (the "Software"), to
+/// deal in the Software without restriction, including without limitation the
+/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+/// sell copies of the Software, and to permit persons to whom the Software is
+/// furnished to do so, subject to the following conditions:
+///
+/// The above copyright notice and this permission notice shall be included in all
+/// copies or substantial portions of the Software.
+///
+/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+/// SOFTWARE.
+///
+namespace arm_compute
+{
+/**
+@page operators_list Supported Operators
+
+@tableofcontents
+
+@section S9_1_operators_list Supported Operators
+
+Compute Library supports operators that are listed in below table.
+
+Compute Library supports a wide list of data-types, information can been directly found in the documentation of each kernel/function.
+The main data-types that the Machine Learning functions support are the following:
+  <ul>
+    <li>BFLOAT16: 16-bit non-standard brain floating point
+    <li>QASYMM8: 8-bit unsigned asymmetric quantized
+    <li>QASYMM8_SIGNED: 8-bit signed asymmetric quantized
+    <li>QSYMM8_PER_CHANNEL: 8-bit signed symmetric quantized (Used for the weights)
+    <li>QSYMM8: 8-bit unsigned symmetric quantized
+    <li>QSYMM16: 16-bit unsigned symmetric quantized
+    <li>F32: 32-bit single precision floating point
+    <li>F16: 16-bit half precision floating point
+    <li>S32: 32-bit signed integer
+    <li>U8: 8-bit unsigned char
+    <li>All: include all above data types
+  </ul>
+
+Compute Library supports the following data layouts (fast changing dimension from right to left):
+  <ul>
+    <li>NHWC: The native layout of Compute Library that delivers the best performance where channels are in the fastest changing dimension
+    <li>NCHW: Legacy layout where width is in the fastest changing dimension
+    <li>All: include all above data layouts
+  </ul>
+where N = batches, C = channels, H = height, W = width
+
+<table>
+<caption id="multi_row"></caption>
+<tr>
+  <th>Function
+  <th>Description
+  <th>Equivalent Android NNAPI Op
+  <th>Backends
+  <th>Data Layouts
+  <th>Data Types
+<tr>
+  <td rowspan="2">ActivationLayer
+  <td rowspan="2" style="width:200px;"> Function to simulate an activation layer with the specified activation function.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_ELU
+       <li>ANEURALNETWORKS_HARD_SWISH
+       <li>ANEURALNETWORKS_LOGISTIC
+       <li>ANEURALNETWORKS_RELU
+       <li>ANEURALNETWORKS_RELU1
+       <li>ANEURALNETWORKS_RELU6
+       <li>ANEURALNETWORKS_TANH
+      </ul>
+  <td>NEActivationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QSYMM16<td>QSYMM16
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLActivationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QSYMM16<td>QSYMM16
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td rowspan="2">ConcatenateLayer
+  <td rowspan="2" style="width:200px;"> Function to concatenate tensors along a given axis.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_CONCATENATION
+      </ul>
+  <td>NEConcatenateLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLConcatenateLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td rowspan="2">ConvertFullyConnectedWeights
+  <td rowspan="2" style="width:200px;"> Function to tranpose the wieghts for the fully connected layer.
+  <td rowspan="2">
+      <ul>
+       <li>None
+      </ul>
+  <td>NEConvertFullyConnectedWeights
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLConvertFullyConnectedWeights
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">Copy
+  <td rowspan="2" style="width:200px;"> Function to copy a tensor.
+  <td rowspan="2">
+      <ul>
+       <li>None
+      </ul>
+  <td>NECopy
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLCopy
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">DequantizationLayer
+  <td rowspan="2" style="width:200px;"> Function to dequantize the values in a tensor
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_DEQUANTIZE
+      </ul>
+  <td>NEDequantizationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>F16
+    <tr><td>QASYMM8<td>F32
+    <tr><td>QASYMM8_SIGNED<td>F16
+    <tr><td>QASYMM8_SIGNED<td>F32
+    <tr><td>QSYMM8_PER_CHANNEL<td>F16
+    <tr><td>QSYMM8_PER_CHANNEL<td>F32
+    <tr><td>QSYMM8<td>F16
+    <tr><td>QSYMM8<td>F32
+    <tr><td>QSYMM16<td>F16
+    <tr><td>QSYMM16<td>F32
+    </table>
+<tr>
+  <td>CLDequantizationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>F16
+    <tr><td>QASYMM8<td>F32
+    <tr><td>QASYMM8_SIGNED<td>F16
+    <tr><td>QASYMM8_SIGNED<td>F32
+    <tr><td>QSYMM8_PER_CHANNEL<td>F16
+    <tr><td>QSYMM8_PER_CHANNEL<td>F32
+    <tr><td>QSYMM8<td>F16
+    <tr><td>QSYMM8<td>F32
+    <tr><td>QSYMM16<td>F16
+    <tr><td>QSYMM16<td>F32
+    </table>
+<tr>
+  <td rowspan="2">DirectConvolutionLayer
+  <td rowspan="2" style="width:200px;"> Function to
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_CONV_2D
+      </ul>
+  <td>NEDirectConvolutionLayer
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src0<th>src1<th>src2<th>dst
+    <tr><td>F16<td>F16<td>F16<td>F16
+    <tr><td>F32<td>F32<td>F32<td>F32
+    </table>
+<tr>
+  <td>CLDirectConvolutionLayer
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src0<th>src1<th>src2<th>dst
+    <tr><td>F16<td>F16<td>F16<td>F16
+    <tr><td>F32<td>F32<td>F32<td>F32
+    <tr><td>QASYMM8<td>QASYMM8<td>S32<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>S32<td>QASYMM8_SIGNED
+    </table>
+<tr>
+  <td rowspan="2">FFT1D
+  <td rowspan="2" style="width:200px;"> Fast Fourier Transform 1D
+  <td rowspan="2">
+      <ul>
+       <li>None
+      </ul>
+  <td>NEFFT1D
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLFFT1D
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    <tr><td>F16<td>F16
+    </table>
+<tr>
+  <td rowspan="2">FFT2D
+  <td rowspan="2" style="width:200px;"> Fast Fourier Transform 2D
+  <td rowspan="2">
+      <ul>
+       <li>None
+      </ul>
+  <td>NEFFT2D
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLFFT2D
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    <tr><td>F16<td>F16
+    </table>
+<tr>
+  <td rowspan="2">FFTConvolutionLayer
+  <td rowspan="2" style="width:200px;"> Fast Fourier Transform Convolution
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_CONV_2D
+      </ul>
+  <td>NEFFTConvolutionLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLFFTConvolutionLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    <tr><td>F16<td>F16
+    </table>
+<tr>
+  <td rowspan="2">Fill
+  <td rowspan="2" style="width:200px;"> Set the values of a tensor with a given value
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_FILL
+      </ul>
+  <td>NEFill
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLFill
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">Floor
+  <td rowspan="2" style="width:200px;"> Round the value to the lowest number
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_FLOOR
+      </ul>
+  <td>NEFloor
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    <tr><td>F16<td>F16
+    </table>
+<tr>
+  <td>CLFloor
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>F32<td>F32
+    <tr><td>F16<td>F16
+    </table>
+<tr>
+  <td rowspan="2">Permute
+  <td rowspan="2" style="width:200px;"> Function to transpose an ND tensor.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_TRANSPOSE
+      </ul>
+  <td>NEPermute
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLPermute
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">PixelWiseMultiplication
+  <td rowspan="2" style="width:200px;"> Function to performe a multiplication.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_MUL
+      </ul>
+  <td>NEPixelWiseMultiplication
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src0<th>src1<th>dst
+    <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
+    <tr><td>QSYMM16<td>QSYMM16<td>S32
+    <tr><td>U8<td>U8<td>U8
+    <tr><td>U8<td>U8<td>S16
+    <tr><td>U8<td>S16<td>S16
+    <tr><td>S16<td>U8<td>S16
+    <tr><td>S16<td>S16<td>S16
+    <tr><td>F16<td>F16<td>F16
+    <tr><td>F32<td>S32<td>F32
+    </table>
+<tr>
+  <td>CLPixelWiseMultiplication
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src0<th>src1<th>dst
+    <tr><td>QASYMM8<td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QSYMM16<td>QSYMM16<td>QASYMM16
+    <tr><td>QSYMM16<td>QSYMM16<td>S32
+    <tr><td>U8<td>U8<td>U8
+    <tr><td>U8<td>U8<td>S16
+    <tr><td>U8<td>S16<td>S16
+    <tr><td>S16<td>U8<td>S16
+    <tr><td>S16<td>S16<td>S16
+    <tr><td>F16<td>F16<td>F16
+    <tr><td>F32<td>S32<td>F32
+    </table>
+<tr>
+  <td rowspan="2">PoolingLayer
+  <td rowspan="2" style="width:200px;"> Function to performe pooling with the specified pooling operation.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_AVERAGE_POOL_2D
+       <li>ANEURALNETWORKS_L2_POOL_2D
+       <li>ANEURALNETWORKS_MAX_POOL_2D
+      </ul>
+  <td>NEPoolingLayer
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLPoolingLayer
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td rowspan="2">PReluLayer
+  <td rowspan="2" style="width:200px;"> Function to compute the activation layer with the PRELU activation function.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_PRELU
+      </ul>
+  <td>NEPReluLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td>CLPReluLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    </table>
+<tr>
+  <td rowspan="2">QuantizationLayer
+  <td rowspan="2" style="width:200px;"> Function to perform quantization layer
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_QUANTIZE
+      </ul>
+  <td>NEQuantizationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8<td>QASYMM16
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8_SIGNED<td>QASYMM16
+    <tr><td>F16<td>QASYMM8
+    <tr><td>F16<td>QASYMM8_SIGNED
+    <tr><td>F16<td>QASYMM16
+    <tr><td>F32<td>QASYMM8
+    <tr><td>F32<td>QASYMM8_SIGNED
+    <tr><td>F32<td>QASYMM16
+    </table>
+<tr>
+  <td>CLQuantizationLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8<td>QASYMM16
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>QASYMM8_SIGNED<td>QASYMM16
+    <tr><td>F16<td>QASYMM8
+    <tr><td>F16<td>QASYMM8_SIGNED
+    <tr><td>F16<td>QASYMM16
+    <tr><td>F32<td>QASYMM8
+    <tr><td>F32<td>QASYMM8_SIGNED
+    <tr><td>F32<td>QASYMM16
+    </table>
+<tr>
+  <td rowspan="2">ReshapeLayer
+  <td rowspan="2" style="width:200px;"> Fucntion to reshape a tensor
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_RESHAPE
+       <li>ANEURALNETWORKS_SQUEEZE
+      </ul>
+  <td>NEReshapeLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLReshapeLayer
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">Scale
+  <td rowspan="2" style="width:200px;"> Fucntion to perform resize a tensor using to interpolate: - Bilenear - Nearest neighbor
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_RESIZE_BILINEAR
+       <li>ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR
+      </ul>
+  <td>NEScale
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    <tr><td>U8<td>U8
+    <tr><td>S16<td>S16
+    </table>
+<tr>
+  <td>CLScale
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>QASYMM8<td>QASYMM8
+    <tr><td>QASYMM8_SIGNED<td>QASYMM8_SIGNED
+    <tr><td>F16<td>F16
+    <tr><td>F32<td>F32
+    <tr><td>U8<td>U8
+    <tr><td>S16<td>S16
+    </table>
+<tr>
+  <td rowspan="2">Slice
+  <td rowspan="2" style="width:200px;"> Function to perform tensor slicing.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_SLICE
+      </ul>
+  <td>NESlice
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLSlice
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">StridedSlice
+  <td rowspan="2" style="width:200px;"> Function to extract a strided slice of a tensor.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_STRIDED_SLICE
+      </ul>
+  <td>NEStridedSlice
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLStridedSlice
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td rowspan="2">Transpose
+  <td rowspan="2" style="width:200px;"> Function to transpose an 2D tensor.
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_TRANSPOSE
+      </ul>
+  <td>NETranspose
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+<tr>
+  <td>CLTranspose
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>src<th>dst
+    <tr><td>All<td>All
+    </table>
+</table>
+
+*/
+} // namespace
+\ No newline at end of file
diff --git a/docs/Doxyfile b/docs/Doxyfile
index 9d711486c0..6fb5de7020 100644
--- a/docs/Doxyfile
+++ b/docs/Doxyfile
@@ -777,6 +777,7 @@ INPUT                  = ./docs/00_introduction.dox \
                          ./docs/06_functions_list.dox \
                          ./docs/07_errata.dox \
                          ./docs/08_api.dox \
+                         ./docs/09_operators_list.dox \
                          ./docs/ComputeLibrary.dir \
                          ./arm_compute/ \
                          ./src/ \