COMPMID-3532: Align data type support between doxygen and implementation - CL

Also removes some unused code. Change-Id: I85687c40999c3cdf9e6fccfcd020b0901a9515fe Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3581 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Michele Di Giorgio <michele.digiorgio@arm.com> 2020-07-06 11:27:21 +0100
committer: Michele Di Giorgio <michele.digiorgio@arm.com> 2020-07-20 12:39:29 +0000
commit: f6f7876e9ee8b58a8a6b335b032d554412fa3983 (patch)
tree: 669c86bfc60ec99965151022a4112c53116b06c0 /arm_compute
parent: 954051f449788f22eb605b126f71af923950ca29 (diff)
download: ComputeLibrary-f6f7876e9ee8b58a8a6b335b032d554412fa3983.tar.gz
29 files changed, 345 insertions, 386 deletions
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index f3d64155a6..cf18e16e34 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -97,14 +97,6 @@ std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt);
  */
 std::string get_data_size_from_data_type(const DataType &dt);
 
-/** Translates fixed point tensor data type to the underlying OpenCL type.
- *
- * @param[in] dt @ref DataType to be translated to OpenCL type.
- *
- * @return The string specifying the underlying OpenCL type to be used.
- */
-std::string get_underlying_cl_type_from_data_type(const DataType &dt);
-
 /** Helper function to get the GPU target from CL device
  *
  * @param[in] device A CL device
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index ce3b325546..253df597e0 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -84,7 +84,6 @@
 #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
index 5aec25259f..c4d0297aef 100644
--- a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
+++ b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
@@ -26,8 +26,6 @@
 
 #include "arm_compute/core/CL/ICLKernel.h"
 
-#include <cstdint>
-
 namespace arm_compute
 {
 class ICLTensor;
@@ -136,7 +134,7 @@ public:
     CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete;
     /** Initialise the kernel's source, destination and border mode.
      *
-     * @param[in]     input            Source tensor. Data types supported: U8.
+     * @param[in]     input            Source tensor. Data types supported: U16/U32.
      * @param[out]    output           Destination tensor. Data types supported: U8.
      * @param[in]     upper_thr        Upper threshold used for the hysteresis
      * @param[in]     lower_thr        Lower threshold used for the hysteresis
@@ -154,7 +152,7 @@ public:
     /** Initialise the kernel's source, destination and border mode.
      *
      * @param[in]     compile_context  The compile context to be used.
-     * @param[in]     input            Source tensor. Data types supported: U8.
+     * @param[in]     input            Source tensor. Data types supported: U16/U32.
      * @param[out]    output           Destination tensor. Data types supported: U8.
      * @param[in]     upper_thr        Upper threshold used for the hysteresis
      * @param[in]     lower_thr        Lower threshold used for the hysteresis
diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
index 3ba426cbfa..f9c33df7c1 100644
--- a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
+++ b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
@@ -57,7 +57,7 @@ public:
      * @param[in]  plane1 The 2D plane that forms channel 1. Must be of U8 format.
      * @param[in]  plane2 The 2D plane that forms channel 2. Must be of U8 format.
      * @param[in]  plane3 The 2D plane that forms channel 3. Must be of U8 format.
-     * @param[out] output The single planar output tensor.
+     * @param[out] output The single planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
      */
     void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
     /** Configure function's inputs and outputs.
@@ -75,7 +75,7 @@ public:
      * @param[in]  plane0 The 2D plane that forms channel 0. Must be of U8 format.
      * @param[in]  plane1 The 2D plane that forms channel 1. Must be of U8 format.
      * @param[in]  plane2 The 2D plane that forms channel 2. Must be of U8 format.
-     * @param[out] output The multi planar output tensor.
+     * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
      */
     void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
     /** Configure function's inputs and outputs.
@@ -84,7 +84,7 @@ public:
      * @param[in]  plane0          The 2D plane that forms channel 0. Must be of U8 format.
      * @param[in]  plane1          The 2D plane that forms channel 1. Must be of U8 format.
      * @param[in]  plane2          The 2D plane that forms channel 2. Must be of U8 format.
-     * @param[out] output          The multi planar output tensor.
+     * @param[out] output          The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
      */
     void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
 
diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h
index 4bf7c754b0..0f500fb63a 100644
--- a/arm_compute/core/CL/kernels/CLConvolutionKernel.h
+++ b/arm_compute/core/CL/kernels/CLConvolutionKernel.h
@@ -108,7 +108,7 @@ public:
      *
      * @param[in]  compile_context  The compile context to be used.
      * @param[in]  input            Source tensor. Data types supported: U8.
-     * @param[out] output           Destination tensor, Data types supported: S16.
+     * @param[out] output           Destination tensor, Data types supported: U16/S16/S32.
      * @param[in]  conv             Convolution matrix to apply to the input tensor.
      * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
      */
@@ -135,7 +135,7 @@ class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel
 public:
     /** Initialise the kernel's input, output and border mode.
      *
-     * @param[in]  input            Source tensor. Data types supported: S16.
+     * @param[in]  input            Source tensor. Data types supported: U16/S16/S32.
      * @param[out] output           Destination tensor, Data types supported: U8, S16.
      * @param[in]  conv             Convolution matrix to apply to the input tensor.
      * @param[in]  scale            Scale of the convolution matrix.
@@ -146,7 +146,7 @@ public:
     /** Initialise the kernel's input, output and border mode.
      *
      * @param[in]  compile_context  The compile context to be used.
-     * @param[in]  input            Source tensor. Data types supported: S16.
+     * @param[in]  input            Source tensor. Data types supported: U16/S16/S32.
      * @param[out] output           Destination tensor, Data types supported: U8, S16.
      * @param[in]  conv             Convolution matrix to apply to the input tensor.
      * @param[in]  scale            Scale of the convolution matrix.
diff --git a/arm_compute/core/CL/kernels/CLCopyKernel.h b/arm_compute/core/CL/kernels/CLCopyKernel.h
index 1f0b5a40e4..11a6d54e90 100644
--- a/arm_compute/core/CL/kernels/CLCopyKernel.h
+++ b/arm_compute/core/CL/kernels/CLCopyKernel.h
@@ -47,7 +47,7 @@ public:
     CLCopyKernel &operator=(CLCopyKernel &&) = default;
     /** Initialize the kernel's input, output.
      *
-     * @param[in]  input         Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input         Source tensor. Data types supported: All.
      * @param[out] output        Destination tensor. Data types supported: same as @p input.
      * @param[in]  padding       (Optional) Padding to be applied to the input tensor
      * @param[in]  output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
@@ -56,7 +56,7 @@ public:
     /** Initialize the kernel's input, output.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input           Source tensor. Data types supported: All.
      * @param[out] output          Destination tensor. Data types supported: same as @p input.
      * @param[in]  padding         (Optional) Padding to be applied to the input tensor
      * @param[in]  output_window   (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
@@ -64,7 +64,7 @@ public:
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr);
     /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel
      *
-     * @param[in] input         Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input         Source tensor info. Data types supported: All.
      * @param[in] output        Destination tensor info. Data types supported: same as @p input.
      * @param[in] padding       (Optional) Padding to be applied to the input tensor
      * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
index d933a7eea5..4ca6c0bf4a 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
@@ -40,7 +40,7 @@ public:
     /** Default move assignment operator. */
     /** Initialize the function's source, destination, conv and border_size.
      *
-     * @param[in]  input              Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED.
+     * @param[in]  input              Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in]  weights            Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
      *                                Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
      * @param[in]  biases             Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
@@ -61,7 +61,7 @@ public:
     /** Initialize the function's source, destination, conv and border_size.
      *
      * @param[in]  compile_context    The compile context to be used.
-     * @param[in]  input              Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED.
+     * @param[in]  input              Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in]  weights            Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
      *                                Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
      * @param[in]  biases             Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
@@ -81,7 +81,7 @@ public:
                    const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
      *
-     * @param[in] input              Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED.
+     * @param[in] input              Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in] weights            Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3].
      *                               Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
      * @param[in] biases             Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
index 699ff5d7a9..1995aed7b6 100644
--- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
@@ -113,7 +113,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
      *
      * @param[in] op       Arithmetic operation to be executed.
-     * @param[in] input1   First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+     * @param[in] input1   First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
      * @param[in] input2   Second tensor input. Data types supported: Same as @p input1.
      * @param[in] output   Output tensor. Data types supported: Same as @p input1.
      * @param[in] policy   Policy to use to handle overflow.
@@ -124,7 +124,7 @@ public:
      *
      * @param[in] compile_context The compile context to be used.
      * @param[in] op              Arithmetic operation to be executed.
-     * @param[in] input1          First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+     * @param[in] input1          First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
      * @param[in] input2          Second tensor input. Data types supported: Same as @p input1.
      * @param[in] output          Output tensor. Data types supported: Same as @p input1.
      * @param[in] policy          Policy to use to handle overflow.
@@ -136,7 +136,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
      *
      * @param[in] op       Arithmetic operation to be executed.
-     * @param[in] input1   First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
      * @param[in] input2   Second tensor input info. Data types supported: Same as @p input1.
      * @param[in] output   Output tensor info. Data types supported: Same as @p input1.
      * @param[in] policy   Policy to use to handle overflow.
@@ -170,7 +170,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
      *
      * @param[in] op       Arithmetic operation to be executed.
-     * @param[in] input1   First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+     * @param[in] input1   First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
      * @param[in] input2   Second tensor input. Data types supported: Same as @p input1.
      * @param[in] output   Output tensor. Data types supported: Same as @p input1.
      * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
@@ -180,7 +180,7 @@ public:
      *
      * @param[in] compile_context The compile context to be used.
      * @param[in] op              Arithmetic operation to be executed.
-     * @param[in] input1          First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+     * @param[in] input1          First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
      * @param[in] input2          Second tensor input. Data types supported: Same as @p input1.
      * @param[in] output          Output tensor. Data types supported: Same as @p input1.
      * @param[in] act_info        (Optional) Activation layer information in case of a fused activation.
@@ -191,7 +191,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
      *
      * @param[in] op       Arithmetic operation to be executed.
-     * @param[in] input1   First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
      * @param[in] input2   Second tensor input info. Data types supported: Same as @p input1.
      * @param[in] output   Output tensor info. Data types supported: Same as @p input1.
      * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
index bb7461ca44..6066e2a815 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
@@ -48,7 +48,7 @@ public:
 
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input  Input tensor. Data type supported: S8
+     * @param[in]  input  Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
      * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
      * @param[in]  info   Kernel metadata:
      *                    - k            Number of matrix columns/rows depending on the type of reduction.
@@ -60,7 +60,7 @@ public:
     /** Initialise the kernel's input and output.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Input tensor. Data type supported: S8
+     * @param[in]  input           Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
      * @param[out] output          Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
      * @param[in]  info            Kernel metadata:
      *                             - k            Number of matrix columns/rows depending on the type of reduction.
@@ -85,7 +85,7 @@ class CLGEMMLowpMatrixAReductionKernel : public ICLGEMMLowpReductionKernel
 public:
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  mtx_a          Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in]  mtx_a          Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
      * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
      * @param[in]  info           Kernel metadata:
      *                            - k            Number of matrix columns/rows depending on the type of reduction.
@@ -97,7 +97,7 @@ public:
     /** Initialise the kernel's input and output.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  mtx_a           Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in]  mtx_a           Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
      * @param[out] vector_sum_row  Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
      * @param[in]  info            Kernel metadata:
      *                             - k            Number of matrix columns/rows depending on the type of reduction.
@@ -108,7 +108,7 @@ public:
     void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel
      *
-     * @param[in] mtx_a          Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in] mtx_a          Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
      * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
      * @param[in] info           Kernel metadata:
      *                           - k            Number of matrix columns/rows depending on the type of reduction.
@@ -134,7 +134,7 @@ class CLGEMMLowpMatrixBReductionKernel : public ICLGEMMLowpReductionKernel
 public:
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  mtx_b          Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in]  mtx_b          Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
      * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
      * @param[in]  info           Kernel metadata:
      *                            - k            Number of matrix columns/rows depending on the type of reduction.
@@ -146,7 +146,7 @@ public:
     /** Initialise the kernel's input and output.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  mtx_b           Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in]  mtx_b           Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
      * @param[out] vector_sum_col  Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
      * @param[in]  info            Kernel metadata:
      *                             - k            Number of matrix columns/rows depending on the type of reduction.
@@ -157,7 +157,7 @@ public:
     void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel
      *
-     * @param[in] mtx_b          Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in] mtx_b          Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
      * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
      * @param[in] info           Kernel metadata:
      *                           - k            Number of matrix columns/rows depending on the type of reduction.
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
deleted file mode 100644
index 3b4a0be0e8..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface to add a bias to each row of the input tensor
- *
- */
-class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel
-{
-public:
-    /** Default constructor */
-    CLGEMMMatrixAccumulateBiasesKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLGEMMMatrixAccumulateBiasesKernel &operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
-    /** Set the accumulate buffer and the biases of the kernel.
-     *
-     * @param[in, out] accum  The accumulate tensor to convert. Data types supported: F16/F32
-     * @param[in]      biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
-     */
-    void configure(ICLTensor *accum, const ICLTensor *biases);
-    /** Set the accumulate buffer and the biases of the kernel.
-     *
-     * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] accum           The accumulate tensor to convert. Data types supported: F16/F32
-     * @param[in]      biases          The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
-     */
-    void configure(const CLCompileContext &compile_context, ICLTensor *accum, const ICLTensor *biases);
-    /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixAccumulateBiasesKernel
-     *
-     * @param[in] accum      The accumulate tensor to convert. Data types supported: F16/F32
-     * @param[in] biases     The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
-     * @param[in] gpu_target GPU target
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *accum, const ITensorInfo *biases, GPUTarget gpu_target);
-
-    // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-    ICLTensor       *_accum;
-    const ICLTensor *_biases;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
index 5f1d82ded1..a741b1745a 100644
--- a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
+++ b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
@@ -51,9 +51,9 @@ public:
      *
      * @note At least one of output1 or output2 must be set.
      *
-     * @param[in]  gx         The input gradient X tensor. Data types supported: S16.
-     * @param[in]  gy         The input gradient Y tensor. Data types supported: S16.
-     * @param[out] magnitude  (Optional) The output tensor - Magnitude. Data types supported: S16.
+     * @param[in]  gx         The input gradient X tensor. Data types supported: S16/S32.
+     * @param[in]  gy         The input gradient Y tensor. Data types supported: S16/S32.
+     * @param[out] magnitude  (Optional) The output tensor - Magnitude. Data types supported: S16/S32.
      * @param[out] phase      (Optional) The output tensor - Phase. Data types supported: U8.
      * @param[in]  mag_type   (Optional) Magnitude calculation type. Default: L2NORM.
      * @param[in]  phase_type (Optional) Phase calculation type. Default: SIGNED.
@@ -65,9 +65,9 @@ public:
      * @note At least one of output1 or output2 must be set.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  gx              The input gradient X tensor. Data types supported: S16.
-     * @param[in]  gy              The input gradient Y tensor. Data types supported: S16.
-     * @param[out] magnitude       (Optional) The output tensor - Magnitude. Data types supported: S16.
+     * @param[in]  gx              The input gradient X tensor. Data types supported: S16/S32.
+     * @param[in]  gy              The input gradient Y tensor. Data types supported: S16/S32.
+     * @param[out] magnitude       (Optional) The output tensor - Magnitude. Data types supported: S16/S32.
      * @param[out] phase           (Optional) The output tensor - Phase. Data types supported: U8.
      * @param[in]  mag_type        (Optional) Magnitude calculation type. Default: L2NORM.
      * @param[in]  phase_type      (Optional) Phase calculation type. Default: SIGNED.
diff --git a/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/arm_compute/core/CL/kernels/CLPadLayerKernel.h
index 3b78bb9834..5bf5841803 100644
--- a/arm_compute/core/CL/kernels/CLPadLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLPadLayerKernel.h
@@ -25,7 +25,6 @@
 #define ARM_COMPUTE_CLPADLAYERKERNEL_H
 
 #include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
 
 namespace arm_compute
 {
@@ -49,7 +48,7 @@ public:
     ~CLPadLayerKernel() = default;
     /** Set the input and output tensor.
      *
-     * @param[in]  input          Source tensor. Data types supported: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
+     * @param[in]  input          Source tensor. Data types supported: All.
      * @param[out] output         Output tensor. Data type supported: same as @p input
      * @param[in]  padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
      *                            specifies the front and the end padding in the i-th dimension.
@@ -73,7 +72,7 @@ public:
                    PaddingMode mode = PaddingMode::CONSTANT);
     /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel
      *
-     * @param[in] input          Source tensor info. Data types supported: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
+     * @param[in] input          Source tensor info. Data types supported: All.
      * @param[in] output         Output tensor info. Data type supported: same as @p input
      * @param[in] padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
      *                           specifies the front and the end padding in the i-th dimension.
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index bb64060a12..bb98eb83bf 100644
--- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -48,17 +48,23 @@ public:
     CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default;
     /** Initialise the kernel's input, output and border mode.
      *
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)                         -> U8
+     *   - (U8,U8)                         -> S16
+     *   - (U8,S16)                        -> S16
+     *   - (S16,U8)                        -> S16
+     *   - (S16,S16)                       -> S16
+     *   - (F16,F16)                       -> F16
+     *   - (F32,F32)                       -> F32
+     *   - (QASYMM8,QASYMM8)               -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16)               -> QSYMM16
+     *   - (QSYMM16,QSYMM16)               -> S32
+     *
      * @param[in]  input1          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
-     * @param[in]  input2          An input tensor. Data types supported: same as @p input1.
-     * @param[out] output          The output tensor, Data types supported:
-     *                             - U8, only if both input are U8
-     *                             - QASYMM8, only if both inputs are QASYMM8
-     *                             - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED
-     *                             - S16
-     *                             - QSYMM16, only if both inputs are QSYMM16
-     *                             - S32, only if both inputs are QSYMM16
-     *                             - F16
-     *                             - F32
+     * @param[in]  input2          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+     * @param[out] output          The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      * @param[in]  scale           Scale to apply after multiplication.
      *                             Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
      * @param[in]  overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
@@ -69,10 +75,24 @@ public:
                    ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Initialise the kernel's input, output and border mode.
      *
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)                         -> U8
+     *   - (U8,U8)                         -> S16
+     *   - (U8,S16)                        -> S16
+     *   - (S16,U8)                        -> S16
+     *   - (S16,S16)                       -> S16
+     *   - (F16,F16)                       -> F16
+     *   - (F32,F32)                       -> F32
+     *   - (QASYMM8,QASYMM8)               -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16)               -> QSYMM16
+     *   - (QSYMM16,QSYMM16)               -> S32
+     *
      * @param[in]  compile_context The compile context to be used.
      * @param[in]  input1          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
-     * @param[in]  input2          An input tensor. Data types supported: same as @p input1.
-     * @param[out] output          The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
+     * @param[in]  input2          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+     * @param[out] output          The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      * @param[in]  scale           Scale to apply after multiplication.
      *                             Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
      * @param[in]  overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
@@ -83,17 +103,23 @@ public:
                    ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel
      *
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)                         -> U8
+     *   - (U8,U8)                         -> S16
+     *   - (U8,S16)                        -> S16
+     *   - (S16,U8)                        -> S16
+     *   - (S16,S16)                       -> S16
+     *   - (F16,F16)                       -> F16
+     *   - (F32,F32)                       -> F32
+     *   - (QASYMM8,QASYMM8)               -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16)               -> QSYMM16
+     *   - (QSYMM16,QSYMM16)               -> S32
+     *
      * @param[in] input1          An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
-     * @param[in] input2          An input tensor info. Data types supported: same as @p input1.
-     * @param[in] output          The output tensor info, Data types supported:
-     *                            - U8, only if both input are U8
-     *                            - QASYMM8, only if both inputs are QASYMM8
-     *                            - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED
-     *                            - S16
-     *                            - QSYMM16, only if both inputs are QSYMM16
-     *                            - S32, only if both inputs are QSYMM16
-     *                            - F16
-     *                            - F32
+     * @param[in] input2          An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+     * @param[in] output          The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      * @param[in] scale           Scale to apply after multiplication.
      *                            Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
      * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
diff --git a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
index 279d891497..e3edc9f724 100644
--- a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
@@ -47,7 +47,7 @@ public:
     CLReorgLayerKernel &operator=(CLReorgLayerKernel &&) = default;
     /** Initialize the kernel's input, output.
      *
-     * @param[in]  input  Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input  Source tensor. Data types supported: All.
      * @param[out] output Destination tensor with tensor shape:
      *                    [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
      *                    the same number of input elements. Data types supported: same as @p input.
@@ -58,7 +58,7 @@ public:
     /** Initialize the kernel's input, output.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input           Source tensor. Data types supported: All.
      * @param[out] output          Destination tensor with tensor shape:
      *                             [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
      *                             the same number of input elements. Data types supported: same as @p input.
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
index 5297af2c1c..f8c1019d53 100644
--- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
@@ -27,93 +27,10 @@
 #include "arm_compute/core/CL/ICLSimple3DKernel.h"
 #include "arm_compute/core/KernelDescriptors.h"
 
-#include <tuple>
-
 namespace arm_compute
 {
 class ICLTensor;
 
-/** Interface for the identifying the max value of 1D Logits */
-class CLLogits1DMaxKernel : public ICLSimple3DKernel
-{
-public:
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input  Source tensor. Data types supported: QASYMM8/F16/F32
-     * @param[out] output Destination tensor. Data types supported: same as @p input
-     */
-    void configure(const ICLTensor *input, ICLTensor *output);
-    /** Set the input and output tensors.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32
-     * @param[out] output          Destination tensor. Data types supported: same as @p input
-     */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxKernel
-     *
-     * @param[in] input  Source tensor. Data types supported: QASYMM8/F16/F32
-     * @param[in] output Destination tensor. Data types supported: same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
-/** Interface for shifting, exponentiating and summing the logits */
-class CLLogits1DShiftExpSumKernel : public ICLKernel
-{
-public:
-    /** Default constructor */
-    CLLogits1DShiftExpSumKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default;
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input  Source tensor. Data types supported: QASYMM8/F16/F32
-     * @param[in]  max    Max values tensor. Data types supported: same as @p input
-     * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
-     * @param[out] sum    Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
-     * @param[in]  beta   (Optional) A scaling factor for the exponent. Defaults to 1.0
-     */
-    void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
-    /** Set the input and output tensors.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32
-     * @param[in]  max             Max values tensor. Data types supported: same as @p input
-     * @param[out] output          Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
-     * @param[out] sum             Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
-     * @param[in]  beta            (Optional) A scaling factor for the exponent. Defaults to 1.0
-     */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
-    /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DShiftExpSumKernel
-     *
-     * @param[in] input  Source tensor. Data types supported: QASYMM8/F16/F32
-     * @param[in] max    Max values tensor. Data types supported: same as @p input
-     * @param[in] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
-     * @param[in] sum    Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum);
-
-    // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-    const ICLTensor *_input;
-    const ICLTensor *_max;
-    ICLTensor       *_output;
-    ICLTensor       *_sum;
-};
-
 /** Interface for max, shifting, exponentiating and summing the logits */
 class CLLogits1DMaxShiftExpSumKernel : public ICLKernel
 {
@@ -134,7 +51,7 @@ public:
     CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default;
     /** Set the input and output tensors.
      *
-     * @param[in]     input  Source tensor. Data types supported: F16/F32
+     * @param[in]     input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
      * @param[in,out] max    Max values tensor. Data types supported: same as @p input
      * @param[out]    output Destination tensor. Data types supported: same as @p input
      * @param[out]    sum    Sum of 1D logits tensor. Data types supported: same as @p input
@@ -144,7 +61,7 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]     compile_context The compile context to be used.
-     * @param[in]     input           Source tensor. Data types supported: F16/F32
+     * @param[in]     input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
      * @param[in,out] max             Max values tensor. Data types supported: same as @p input
      * @param[out]    output          Destination tensor. Data types supported: same as @p input
      * @param[out]    sum             Sum of 1D logits tensor. Data types supported: same as @p input
@@ -153,7 +70,7 @@ public:
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel
      *
-     * @param[in] input  Source tensor. Data types supported: F16/F32
+     * @param[in] input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
      * @param[in] max    Max values tensor. Data types supported: same as @p input
      * @param[in] output Destination tensor. Data types supported: same as @p input
      * @param[in] sum    Sum of 1D logits tensor. Data types supported: same as @p input
@@ -205,7 +122,7 @@ public:
      *
      * @param[in]  input  Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
      * @param[in]  sum    Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
-     * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
+     * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input
      * @param[in]  info   Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
      */
     void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
@@ -214,7 +131,7 @@ public:
      * @param[in]  compile_context The compile context to be used.
      * @param[in]  input           Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
      * @param[in]  sum             Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
-     * @param[out] output          Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
+     * @param[out] output          Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input
      * @param[in]  info            Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
      */
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
diff --git a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
index d8e1ae113d..b523b97233 100644
--- a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
@@ -49,7 +49,7 @@ public:
 
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input             Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  input             Source tensor. Data types supported: All.
      * @param[out] output            Destination tensor. Data types supported: same as @p input.
      * @param[in]  info              Contains stride information described in @ref Size2D.
      * @param[in]  upsampling_policy Defines the policy to fill the intermediate pixels.
@@ -58,7 +58,7 @@ public:
     /** Initialise the kernel's input and output.
      *
      * @param[in]  compile_context   The compile context to be used.
-     * @param[in]  input             Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  input             Source tensor. Data types supported: All.
      * @param[out] output            Destination tensor. Data types supported: same as @p input.
      * @param[in]  info              Contains stride information described in @ref Size2D.
      * @param[in]  upsampling_policy Defines the policy to fill the intermediate pixels.
@@ -66,7 +66,7 @@ public:
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
     /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel
      *
-     * @param[in] input             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in] input             Source tensor info. Data types supported: All.
      * @param[in] output            Destination tensor info. Data types supported: same as @p input.
      * @param[in] info              Contains  stride information described in @ref Size2D.
      * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h
index ac6a41ef2e..c6d61e45f2 100644
--- a/arm_compute/runtime/CL/functions/CLComparison.h
+++ b/arm_compute/runtime/CL/functions/CLComparison.h
@@ -79,7 +79,7 @@ public:
 public:
     /** Initialise the kernel's inputs and outputs.
      *
-     * @param[in]  input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  input1 Source tensor. Data types supported: All.
      *                    The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
      * @param[in]  input2 Source tensor. Data types supported: Same as @p input1.
      *                    The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -92,7 +92,7 @@ public:
     /** Initialise the kernel's inputs and outputs.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input1          Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  input1          Source tensor. Data types supported: All.
      *                             The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
      * @param[in]  input2          Source tensor. Data types supported: Same as @p input1.
      *                             The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -101,7 +101,7 @@ public:
     void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLComparison
      *
-     * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in] input1 Source tensor. Data types supported: All.
      * @param[in] input2 Source tensor. Data types supported: Same as @p input1.
      * @param[in] output Destination tensor. Data types supported: U8.
      *
diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h
index ee5f31bc4c..c20d75eea8 100644
--- a/arm_compute/runtime/CL/functions/CLCopy.h
+++ b/arm_compute/runtime/CL/functions/CLCopy.h
@@ -38,7 +38,7 @@ class CLCopy : public ICLSimpleFunction
 public:
     /** Initialise the function's source and destination.
      *
-     * @param[in]  input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input  Source tensor. Data types supported: All.
      * @param[out] output Output tensor. Data types supported: Same as @p input.
      *
      */
@@ -46,14 +46,14 @@ public:
     /** Initialise the function's source and destination.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input           Source tensor. Data types supported: All.
      * @param[out] output          Output tensor. Data types supported: Same as @p input.
      *
      */
     void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLCopy
      *
-     * @param[in] input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input  Source tensor. Data types supported: All.
      * @param[in] output Output tensor. Data types supported: Same as @p input.
      *
      * @return a status
diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
index 8cf1ca83c5..9cd3c150cc 100644
--- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
+++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
@@ -33,7 +33,7 @@ class ICLTensor;
 
 /** Basic function to run @ref CLSaturatedArithmeticOperationKernel for addition
  *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+ * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
  * @note The function performs an arithmetic addition between two tensors.
  */
 class CLArithmeticAddition : public ICLSimpleFunction
@@ -41,32 +41,74 @@ class CLArithmeticAddition : public ICLSimpleFunction
 public:
     /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * @param[in, out] input1   First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)           -> U8
+     *   - (U8,U8)           -> S16
+     *   - (S16,U8)          -> S16
+     *   - (U8,S16)          -> S16
+     *   - (S16,S16)         -> S16
+     *   - (S32,S32)         -> S32
+     *   - (F16,F16)         -> F16
+     *   - (F32,F32)         -> F32
+     *   - (QASYMM8,QASYMM8) -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16) -> QSYMM16
+     *
+     * @param[in, out] input1   First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      *                          The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2   Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32.
+     * @param[in, out] input2   Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      *                          The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output   Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32.
+     * @param[out]     output   Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      * @param[in]      policy   Policy to use to handle overflow.
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Initialise the kernel's inputs, output and conversion policy.
      *
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)           -> U8
+     *   - (U8,U8)           -> S16
+     *   - (S16,U8)          -> S16
+     *   - (U8,S16)          -> S16
+     *   - (S16,S16)         -> S16
+     *   - (S32,S32)         -> S32
+     *   - (F16,F16)         -> F16
+     *   - (F32,F32)         -> F32
+     *   - (QASYMM8,QASYMM8) -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16) -> QSYMM16
+     *
      * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32.
+     * @param[out]     output          Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      * @param[in]      policy          Policy to use to handle overflow.
      * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
      */
     void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for addition
      *
-     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
-     * @param[in] input2   Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32.
-     * @param[in] output   Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32.
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)           -> U8
+     *   - (U8,U8)           -> S16
+     *   - (S16,U8)          -> S16
+     *   - (U8,S16)          -> S16
+     *   - (S16,S16)         -> S16
+     *   - (S32,S32)         -> S32
+     *   - (F16,F16)         -> F16
+     *   - (F32,F32)         -> F32
+     *   - (QASYMM8,QASYMM8) -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16) -> QSYMM16
+     *
+     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
+     * @param[in] input2   Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
+     * @param[in] output   Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      * @param[in] policy   Policy to use to handle overflow.
      * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
      *
@@ -77,7 +119,7 @@ public:
 
 /** Basic function to run @ref CLSaturatedArithmeticOperationKernel for subtraction
  *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/U32/F16/F32.
+ * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32.
  * @note The function performs an arithmetic subtraction between two tensors.
  */
 class CLArithmeticSubtraction : public ICLSimpleFunction
@@ -85,32 +127,74 @@ class CLArithmeticSubtraction : public ICLSimpleFunction
 public:
     /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * @param[in, out] input1   First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)           -> U8
+     *   - (U8,U8)           -> S16
+     *   - (S16,U8)          -> S16
+     *   - (U8,S16)          -> S16
+     *   - (S16,S16)         -> S16
+     *   - (S32,S32)         -> S32
+     *   - (F16,F16)         -> F16
+     *   - (F32,F32)         -> F32
+     *   - (QASYMM8,QASYMM8) -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16) -> QSYMM16
+     *
+     * @param[in, out] input1   First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      *                          The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2   Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+     * @param[in, out] input2   Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      *                          The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output   Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[out]     output   Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      * @param[in]      policy   Policy to use to handle overflow.
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Initialise the kernel's inputs, output and conversion policy.
      *
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)           -> U8
+     *   - (U8,U8)           -> S16
+     *   - (S16,U8)          -> S16
+     *   - (U8,S16)          -> S16
+     *   - (S16,S16)         -> S16
+     *   - (S32,S32)         -> S32
+     *   - (F16,F16)         -> F16
+     *   - (F32,F32)         -> F32
+     *   - (QASYMM8,QASYMM8) -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16) -> QSYMM16
+     *
      * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[out]     output          Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      * @param[in]      policy          Policy to use to handle overflow.
      * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
      */
     void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for subtraction
      *
-     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
-     * @param[in] input2   Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
-     * @param[in] output   Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32.
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)           -> U8
+     *   - (U8,U8)           -> S16
+     *   - (S16,U8)          -> S16
+     *   - (U8,S16)          -> S16
+     *   - (S16,S16)         -> S16
+     *   - (S32,S32)         -> S32
+     *   - (F16,F16)         -> F16
+     *   - (F32,F32)         -> F32
+     *   - (QASYMM8,QASYMM8) -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16) -> QSYMM16
+     *
+     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
+     * @param[in] input2   Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
+     * @param[in] output   Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
      * @param[in] policy   Policy to use to handle overflow.
      * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
      *
@@ -170,30 +254,30 @@ class CLElementwiseMax : public ICLSimpleFunction
 public:
     /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * @param[in, out] input1   First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     * @param[in, out] input1   First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
      *                          The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2   Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     * @param[in, out] input2   Second tensor input. Data types supported: same as @p input1.
      *                          The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output   Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[out]     output   Output tensor. Data types supported: same as @p input1.
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     * @param[in, out] input2          Second tensor input. Data types supported: same as @p input1.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[out]     output          Output tensor. Data types supported: same as @p input1.
      * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
      */
     void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for max
      *
-     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
-     * @param[in] input2   Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
-     * @param[in] output   Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
+     * @param[in] input2   Second tensor input info. Data types supported: same as @p input1.
+     * @param[in] output   Output tensor info. Data types supported: same as @p input1.
      * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
@@ -211,30 +295,30 @@ class CLElementwiseMin : public ICLSimpleFunction
 public:
     /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * @param[in, out] input1   First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     * @param[in, out] input1   First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
      *                          The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2   Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     * @param[in, out] input2   Second tensor input. Data types supported: same as @p input1.
      *                          The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output   Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[out]     output   Output tensor. Data types supported: same as @p input1.
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     * @param[in, out] input2          Second tensor input. Data types supported: same as @p input1.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[out]     output          Output tensor. Data types supported: same as @p input1.
      * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
      */
     void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for min
      *
-     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
-     * @param[in] input2   Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
-     * @param[in] output   Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
+     * @param[in] input2   Second tensor input info. Data types supported: same as @p input1.
+     * @param[in] output   Output tensor info. Data types supported: same as @p input1.
      * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
@@ -252,30 +336,30 @@ class CLElementwiseSquaredDiff : public ICLSimpleFunction
 public:
     /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * @param[in, out] input1   First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
+     * @param[in, out] input1   First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      *                          The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2   Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     * @param[in, out] input2   Second tensor input. Data types supported: same as @p input1.
      *                          The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output   Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[out]     output   Output tensor. Data types supported: same as @p input1.
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     * @param[in, out] input2          Second tensor input. Data types supported: same as @p input1.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[out]     output          Output tensor. Data types supported: same as @p input1.
      * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
      */
     void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for squared difference
      *
-     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
-     * @param[in] input2   Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
-     * @param[in] output   Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+     * @param[in] input2   Second tensor input info. Data types supported: same as @p input1.
+     * @param[in] output   Output tensor info. Data types supported: same as @p input1.
      * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
      *
      * @return a status
diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h
index 99cae077fe..b79b234158 100644
--- a/arm_compute/runtime/CL/functions/CLFill.h
+++ b/arm_compute/runtime/CL/functions/CLFill.h
@@ -38,14 +38,14 @@ class CLFill : public ICLSimpleFunction
 public:
     /** Initialize the function
      *
-     * @param[in,out] tensor         Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in,out] tensor         Source tensor. Data types supported: All.
      * @param[in]     constant_value Constant value to use to fill tensor.
      */
     void configure(ICLTensor *tensor, PixelValue constant_value);
     /** Initialize the function
      *
      * @param[in]     compile_context The compile context to be used.
-     * @param[in,out] tensor          Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in,out] tensor          Source tensor. Data types supported: All.
      * @param[in]     constant_value  Constant value to use to fill tensor.
      */
     void configure(const CLCompileContext &compile_context, ICLTensor *tensor, PixelValue constant_value);
diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h
index e552c353db..18bc20e654 100644
--- a/arm_compute/runtime/CL/functions/CLFillBorder.h
+++ b/arm_compute/runtime/CL/functions/CLFillBorder.h
@@ -38,7 +38,7 @@ class CLFillBorder : public ICLSimpleFunction
 public:
     /** Initialize the function
      *
-     * @param[in,out] tensor                Source tensor. Data types supported: U8/S16
+     * @param[in,out] tensor                Source tensor. Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
      * @param[in]     border_width          The border width
      * @param[in]     border_mode           Strategy to use for borders.
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
@@ -47,7 +47,7 @@ public:
     /** Initialize the function
      *
      * @param[in]     compile_context       The compile context to be used.
-     * @param[in,out] tensor                Source tensor. Data types supported: U8/S16
+     * @param[in,out] tensor                Source tensor. Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
      * @param[in]     border_width          The border width
      * @param[in]     border_mode           Strategy to use for borders.
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 018d88d366..8e4d3906d1 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -74,7 +74,7 @@ public:
 
     /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
      *
-     * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input Input tensor. Data types supported: All
      * @param[in] info  RHS matrix information to be used for reshaping.
      */
     void configure(const ICLTensor *input, GEMMRHSMatrixInfo info)
@@ -85,7 +85,7 @@ public:
     /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
      *
      * @param[in] compile_context The compile context to be used.
-     * @param[in] input           Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input           Input tensor. Data types supported: All
      * @param[in] info            RHS matrix information to be used for reshaping.
      */
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info)
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index be9283b6ec..277b27f690 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -98,8 +98,8 @@ class CLConvolutionLayerReshapeWeightsTransform : public ITransformWeights
 public:
     /** Configures the @ref CLConvolutionLayerReshapeWeights function
      *
-     * @param[in] input      Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] biases     Biases tensor. Data type supported: Same as @p input.
+     * @param[in] input      Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+     * @param[in] biases     Biases tensor. Data type supported: same as @p input, S32 if @p input is quantized.
      * @param[in] num_groups Number of groups when performing a grouped convolution.
      */
     void configure(const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
@@ -109,8 +109,8 @@ public:
     /** Configures the @ref CLConvolutionLayerReshapeWeights function
      *
      * @param[in] compile_context The compile context to be used.
-     * @param[in] input           Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] biases          Biases tensor. Data type supported: Same as @p input.
+     * @param[in] input           Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+     * @param[in] biases          Biases tensor. Data type supported: same as @p input, S32 if @p input is quantized.
      * @param[in] num_groups      Number of groups when performing a grouped convolution.
      */
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
@@ -183,11 +183,11 @@ public:
      *
      * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
      *                          while every optional dimension from 4 and above represent a batch of inputs.
-     *                          Data types supported: QASYMM8/F16/F32.
+     *                          Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in]  weights      Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
-     *                          Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     *                          Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED.
      * @param[in]  biases       Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
-     *                          Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+     *                          Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type.
      * @param[out] output       Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                          Data types supported: Same as @p input.
      * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
@@ -204,11 +204,11 @@ public:
      * @param[in]  compile_context The compile context to be used.
      * @param[in]  input           Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
      *                             while every optional dimension from 4 and above represent a batch of inputs.
-     *                             Data types supported: QASYMM8/F16/F32.
+     *                             Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in]  weights         Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
-     *                             Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     *                             Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED.
      * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
-     *                             Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+     *                             Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type.
      * @param[out] output          Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                             Data types supported: Same as @p input.
      * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
@@ -225,11 +225,11 @@ public:
      *
      * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
      *                          while every optional dimension from 4 and above represent a batch of inputs.
-     *                          Data types supported: QASYMM8/F16/F32.
+     *                          Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in]  weights      Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
-     *                          Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     *                          Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED.
      * @param[in]  biases       Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
-     *                          Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+     *                          Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type.
      * @param[out] output       Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                          Data types supported: Same as @p input.
      * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
@@ -252,12 +252,12 @@ private:
     /** Configures the appropriate matrix multiply routine
      *
      * @param[in]      compile_context       The compile context to be used.
-     * @param[in]      input                 Input tensor. Data types supported: QASYMM8/F16/F32.
-     * @param[in]      weights               Weights tensor. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]      input                 Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]      weights               Weights tensor. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or
+     *                                       QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED.
      * @param[in]      biases                Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
-     *                                       Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
-     * @param[in, out] output                Output tensor. Data types supported: Same as @p input,
-     *                                       except for input of QASYMM8 type where output should be of S32 type.
+     *                                       Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type.
+     * @param[in, out] output                Output tensor. Data types supported: same as @p input.
      * @param[in]      gemmlowp_output_stage GEMMLowp output stage info
      * @param[in]      gemm_3d_depth         Depth of GEMM 3D
      * @param[in]      act_info              Activation to apply after the matrix multiplication
@@ -267,12 +267,12 @@ private:
                       int gemm_3d_depth, const ActivationLayerInfo &act_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines
      *
-     * @param[in] input                 Input tensor info. Data types supported: QASYMM8/F16/F32.
-     * @param[in] weights               Weights tensor info. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in] input                 Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in] weights               Weights tensor info. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or
+     *                                  QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED.
      * @param[in] biases                Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
-     *                                  Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
-     * @param[in] output                Output tensor info. Data types supported: Same as @p input,
-     *                                  except for input of QASYMM8 type where output should be of S32 type.
+     *                                  Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type.
+     * @param[in] output                Output tensor info. Data types supported: same as @p input.
      * @param[in] gemmlowp_output_stage GEMMLowp output stage info
      * @param[in] gemm_3d_depth         Depth of GEMM 3D
      * @param[in] skip_im2col           Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 0e822bc5a2..57b1e30df5 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -59,8 +59,8 @@ public:
      * @note GEMMLowp:  low precision GEMM kernel. [A * B + C]
      *  This kernel performs the following computations:
      *
-     *  -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
-     *  -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
+     *  -# Convert a values from 8-bit quantized to int32 and add a_offset to each of them.
+     *  -# Convert b values from 8-bit quantized to int32 and add b_offset to each of them.
      *  -# Compute the matrix product of the resulting a * b in int32.
      *  -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
      *
@@ -77,8 +77,8 @@ public:
      * @note GEMMLowp:  low precision GEMM kernel. [A * B + C]
      *  This kernel performs the following computations:
      *
-     *  -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
-     *  -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
+     *  -# Convert a values from 8-bit quantized to int32 and add a_offset to each of them.
+     *  -# Convert b values from 8-bit quantized to int32 and add b_offset to each of them.
      *  -# Compute the matrix product of the resulting a * b in int32.
      *  -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
      *
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index a647281e29..b87daba1f8 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -38,19 +38,25 @@ class CLPixelWiseMultiplication : public ICLSimpleFunction
 public:
     /** Initialise the kernel's inputs, output and convertion policy.
      *
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)                         -> U8
+     *   - (U8,U8)                         -> S16
+     *   - (U8,S16)                        -> S16
+     *   - (S16,U8)                        -> S16
+     *   - (S16,S16)                       -> S16
+     *   - (F16,F16)                       -> F16
+     *   - (F32,F32)                       -> F32
+     *   - (QASYMM8,QASYMM8)               -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16)               -> QSYMM16
+     *   - (QSYMM16,QSYMM16)               -> S32
+     *
      * @param[in, out] input1          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2          An input tensor. Data types supported: same as @p input1.
+     * @param[in, out] input2          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output          The output tensor. Data types supported:
-     *                                 - U8, only if both input are U8
-     *                                 - QASYMM8, only if both inputs are QASYMM8
-     *                                 - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED
-     *                                 - S16
-     *                                 - QSYMM16, only if both inputs are QSYMM16
-     *                                 - S32, only if both inputs are QSYMM16
-     *                                 - F16
-     *                                 - F32
+     * @param[out]     output          The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      * @param[in]      scale           Scale to apply after multiplication.
      *                                 Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
      * @param[in]      overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
@@ -61,12 +67,26 @@ public:
                    ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Initialise the kernel's inputs, output and convertion policy.
      *
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)                         -> U8
+     *   - (U8,U8)                         -> S16
+     *   - (U8,S16)                        -> S16
+     *   - (S16,U8)                        -> S16
+     *   - (S16,S16)                       -> S16
+     *   - (F16,F16)                       -> F16
+     *   - (F32,F32)                       -> F32
+     *   - (QASYMM8,QASYMM8)               -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16)               -> QSYMM16
+     *   - (QSYMM16,QSYMM16)               -> S32
+     *
      * @param[in]      compile_context The compile context to be used.
      * @param[in, out] input1          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2          An input tensor. Data types supported: same as @p input1.
+     * @param[in, out] input2          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output          The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
+     * @param[out]     output          The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      * @param[in]      scale           Scale to apply after multiplication.
      *                                 Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
      * @param[in]      overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
@@ -77,17 +97,24 @@ public:
                    ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplication
      *
+     * Valid configurations (Input1,Input2) -> Output :
+     *
+     *   - (U8,U8)                         -> U8
+     *   - (U8,U8)                         -> S16
+     *   - (U8,S16)                        -> S16
+     *   - (S16,U8)                        -> S16
+     *   - (S16,S16)                       -> S16
+     *   - (F16,F16)                       -> F16
+     *   - (F32,F32)                       -> F32
+     *   - (QASYMM8,QASYMM8)               -> QASYMM8
+     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+     *   - (QSYMM16,QSYMM16)               -> QSYMM16
+     *   - (QSYMM16,QSYMM16)               -> S32
+     *
+     *
      * @param[in] input1          An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
-     * @param[in] input2          An input tensor info. Data types supported: same as @p input1.
-     * @param[in] output          The output tensor info. Data types supported:
-     *                            - U8, only if both input are U8
-     *                            - QASYMM8, only if both inputs are QASYMM8
-     *                            - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED
-     *                            - S16
-     *                            - QSYMM16, only if both inputs are QSYMM16
-     *                            - S32, only if both inputs are QSYMM16
-     *                            - F16
-     *                            - F32
+     * @param[in] input2          An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+     * @param[in] output          The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
      * @param[in] scale           Scale to apply after multiplication.
      *                            Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
      * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index 013ba647b1..5d050d71d6 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -26,19 +26,17 @@
 
 #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
 #include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
-#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 
-#include <cstdint>
 #include <memory>
-#include <vector>
 
 namespace arm_compute
 {
+// Forward declarations
 class ICLTensor;
 
 /** Perform reduction operation.
@@ -54,7 +52,7 @@ public:
 
     /** Set the input and output tensors.
      *
-     * @param[in]  input     Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  input     Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32.
      * @param[out] output    Destination tensor. Data types and data layouts supported: Same as @p input.
      * @param[in]  axis      Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3
      * @param[in]  op        Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
@@ -64,7 +62,7 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32.
      * @param[out] output          Destination tensor. Data types and data layouts supported: Same as @p input.
      * @param[in]  axis            Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3
      * @param[in]  op              Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
@@ -74,7 +72,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation.
      *
-     * @param[in] input     Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in] input     Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32.
      * @param[in] output    Destination tensor info. Data types and data layouts supported: Same as @p input.
      * @param[in] axis      Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3
      * @param[in] op        Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
@@ -95,7 +93,6 @@ private:
     std::vector<CLReductionOperationKernel> _reduction_kernels_vector;
     std::vector<CLFillBorderKernel>         _border_handlers_vector;
     CLReshapeLayer                          _reshape;
-    ReductionOperation                      _op;
     unsigned int                            _num_of_stages;
     unsigned int                            _reduction_axis;
     bool                                    _is_serial;
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index 93ad24e893..ec57bacf07 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -47,8 +47,6 @@ class ICLTensor;
  * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f]
  *
  * This function runs the following kernels:
- * -# @ref CLLogits1DMaxKernel
- * -# @ref CLLogits1DShiftExpSumKernel
  * -# @ref CLLogits1DNormKernel
  * And if the reduce_end_axis is not 0, the function will use one of the the following kernels to reshape the input and
  * perform softmax on the reshaped input:
@@ -63,36 +61,36 @@ public:
     CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Set the input and output tensors.
      *
-     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
      * @param[out] output          Destination tensor. Data types supported: same as @p input
      * @param[in]  beta            (Optional) A scaling factor for the exponent. Defaults to 1.f
      * @param[in]  reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
-     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
-     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
-     *                   Must be in range [0, input_num_dimensions).
+     *                             It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                             when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                             Must be in range [0, input_num_dimensions).
      */
     void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0);
     /** Set the input and output tensors.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
      * @param[out] output          Destination tensor. Data types supported: same as @p input
      * @param[in]  beta            (Optional) A scaling factor for the exponent. Defaults to 1.f
      * @param[in]  reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
-     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
-     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
-     *                   Must be in range [0, input_num_dimensions).
+     *                             It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                             when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                             Must be in range [0, input_num_dimensions).
      */
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer
      *
-     * @param[in] input           Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax
+     * @param[in] input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
      * @param[in] output          Destination tensor. Data types supported: same as @p input
      * @param[in] beta            (Optional) A scaling factor for the exponent. Defaults to 1.f
      * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
-     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
-     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
-     *                   Must be in range [0, input_num_dimensions).
+     *                            It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                            when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                            Must be in range [0, input_num_dimensions).
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, size_t reduce_end_axis = 0);
@@ -111,9 +109,9 @@ private:
      * @param[in] input           Original source tensor.
      * @param[in] output          Original destination tensor.
      * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
-     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
-     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
-     *                   Must be in range [0, input_num_dimensions).
+     *                            It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                            when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                            Must be in range [0, input_num_dimensions).
      */
     void configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t reduce_end_axis);
     /** Utility method to configure the kernels needed to flatten the input
@@ -127,9 +125,9 @@ private:
      * @param[in] input           Original source tensor.
      * @param[in] output          Original destination tensor.
      * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
-     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
-     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
-     *                   Must be in range [0, input_num_dimensions).
+     *                            It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                            when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                            Must be in range [0, input_num_dimensions).
      */
     void configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t reduce_end_axis);
 
diff --git a/arm_compute/runtime/CL/functions/CLUnstack.h b/arm_compute/runtime/CL/functions/CLUnstack.h
index bdef44fb0d..5d4d5710ab 100644
--- a/arm_compute/runtime/CL/functions/CLUnstack.h
+++ b/arm_compute/runtime/CL/functions/CLUnstack.h
@@ -48,8 +48,8 @@ public:
     CLUnstack();
     /** Set the input, output and unstacking axis.
      *
-     * @param[in]     input         A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
-     * @param[in,out] output_vector A vector of tensors. Data types supported: Same as @p input.
+     * @param[in]     input         A tensor to be unstacked. Data type supported: All.
+     * @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input.
      *                              Note: The number of elements of the vector will be used as the number of slices to be taken from the axis.
      * @param[in]     axis          The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around.
      *
@@ -58,8 +58,8 @@ public:
     /** Set the input, output and unstacking axis.
      *
      * @param[in]     compile_context The compile context to be used.
-     * @param[in]     input           A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
-     * @param[in,out] output_vector   A vector of tensors. Data types supported: Same as @p input.
+     * @param[in]     input           A tensor to be unstacked. Data type supported: All.
+     * @param[in,out] output_vector   A vector of tensors. Data types supported: same as @p input.
      *                                Note: The number of elements of the vector will be used as the number of slices to be taken from the axis.
      * @param[in]     axis            The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around.
      *
@@ -67,8 +67,8 @@ public:
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, const std::vector<ICLTensor *> &output_vector, int axis);
     /** Static function to check if given info will lead to a valid configuration of @ref CLUnstack
      *
-     * @param[in] input         Input tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
-     * @param[in] output_vector Vector of output tensors' info. Data types supported: Same as @p input.
+     * @param[in] input         Input tensor info. Data type supported: All.
+     * @param[in] output_vector Vector of output tensors' info. Data types supported: same as @p input.
      * @param[in] axis          The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around.
      *
      * @return a status
diff --git a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
index dcaa13471f..07b4c8aecb 100644
--- a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
@@ -53,7 +53,7 @@ public:
 
     /** Initialize the function's source, destination, interpolation type and border_mode.
      *
-     * @param[in]  input             Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  input             Source tensor. Data type supported: All.
      * @param[out] output            Destination tensor. Data types supported: same as @p input.
      * @param[in]  info              Contains stride information described in @ref Size2D.
      * @param[in]  upsampling_policy Defines the policy to fill the intermediate pixels.
@@ -63,16 +63,16 @@ public:
     /** Initialize the function's source, destination, interpolation type and border_mode.
      *
      * @param[in]  compile_context   The compile context to be used.
-     * @param[in]  input             Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  input             Source tensor. Data type supported: All.
      * @param[out] output            Destination tensor. Data types supported: same as @p input.
      * @param[in]  info              Contains stride information described in @ref Size2D.
      * @param[in]  upsampling_policy Defines the policy to fill the intermediate pixels.
      */
     void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output,
                    const Size2D &info, const InterpolationPolicy upsampling_policy);
-    /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
+    /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel
      *
-     * @param[in] input             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in] input             Source tensor info. Data types supported: All.
      * @param[in] output            Destination tensor info. Data types supported: same as @p input.
      * @param[in] info              Contains  stride information described in @ref Size2D.
      * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
author	Michele Di Giorgio <michele.digiorgio@arm.com>	2020-07-06 11:27:21 +0100
committer	Michele Di Giorgio <michele.digiorgio@arm.com>	2020-07-20 12:39:29 +0000
commit	f6f7876e9ee8b58a8a6b335b032d554412fa3983 (patch)
tree	669c86bfc60ec99965151022a4112c53116b06c0 /arm_compute
parent	954051f449788f22eb605b126f71af923950ca29 (diff)
download	ComputeLibrary-f6f7876e9ee8b58a8a6b335b032d554412fa3983.tar.gz