From 338435607fc5291ff991f38aa15d4df5097d1a2d Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Tue, 10 Dec 2019 13:33:18 +0000
Subject: COMPMID-2754: Add support for QASYMM8_SIGNED in NE kernels/functions.

Kernels/Functions extended support:
- NEBatchToSpaceLayerKernel/NEBatchToSpaceLayer
- NEChannelShuffleLayerKernel/NEChannelShuffleLayer
- NECol2ImKernel/NECol2Im
- NEConvertFullyConnectedWeightsKernel/NEConvertFullyConnectedWeights
- NECopyKernel/NECopy
- NEConvolutionLayerReshapeWeights
- NEDepthToSpaceLayerKernel/NEDepthToSpaceLayer
- NEFlattenLayerKernel/NEFlattenLayer
- NEFillBorderKernel
- NEFullyConnectedLayerReshapeWeights
- NEGatherKernel/NEGather
- NEGEMMInterleave4x4Kernel
- NEGEMMTranspose1xWKernel
- NEIm2ColKernel/NEIm2Col
- NEMemsetKernel
- NEPadLayerKernel/NEPadLayer
- NEPermuteKernel/NEPermute
- NEReverseKernel/NEReverse
- NEReorgLayerKernel/NEReorgLayer
- NEReshapeLayerKernel/NEReshapeLayer
- NESplit
- NESlice
- NEStridedSliceKernel/NEStridedSlice
- NESpaceToBatchLayerKernel/NESpaceToBatchLayer
- NESpaceToDepthLayerKernel/NESpaceToDepthLayerKernel
- NEStackLayerKernel/NEStackLayer
- NETileKernel/NETile
- NETransposeKernel/NETranspose
- NEWidthConcatenateLayerKernel/NEHeightConcatenateLayer
- NEHeightConcatenateLayerKernel/NEHeightConcatenateLayer
- NEDepthConcatenateLayerKernel/NEDepthConcatenateLayer
- NEBathConcatenateLayerKernel/NEBatchConcatenateLayer

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: Ia070332ad4c4dbced2541dc46f7f2f3a86833b65
Reviewed-on: https://review.mlplatform.org/c/2442
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 .../NEON/kernels/NEBatchConcatenateLayerKernel.h   |  5 +-
 .../core/NEON/kernels/NEBatchToSpaceLayerKernel.h  |  8 +--
 .../NEON/kernels/NEChannelShuffleLayerKernel.h     |  4 +-
 arm_compute/core/NEON/kernels/NECol2ImKernel.h     |  4 +-
 .../kernels/NEConvertFullyConnectedWeightsKernel.h |  5 +-
 arm_compute/core/NEON/kernels/NECopyKernel.h       |  4 +-
 .../NEON/kernels/NEDepthConcatenateLayerKernel.h   |  5 +-
 .../core/NEON/kernels/NEDepthToSpaceLayerKernel.h  |  5 +-
 arm_compute/core/NEON/kernels/NEFillBorderKernel.h |  3 +-
 .../core/NEON/kernels/NEFlattenLayerKernel.h       |  4 +-
 .../core/NEON/kernels/NEGEMMTranspose1xWKernel.h   |  5 +-
 arm_compute/core/NEON/kernels/NEGatherKernel.h     |  5 +-
 .../NEON/kernels/NEHeightConcatenateLayerKernel.h  |  5 +-
 arm_compute/core/NEON/kernels/NEIm2ColKernel.h     |  4 +-
 arm_compute/core/NEON/kernels/NEMemsetKernel.h     |  2 +-
 arm_compute/core/NEON/kernels/NEPadLayerKernel.h   |  4 +-
 arm_compute/core/NEON/kernels/NEPermuteKernel.h    |  5 +-
 arm_compute/core/NEON/kernels/NEReorgLayerKernel.h |  4 +-
 .../core/NEON/kernels/NEReshapeLayerKernel.h       |  5 +-
 arm_compute/core/NEON/kernels/NEReverseKernel.h    |  4 +-
 arm_compute/core/NEON/kernels/NESelectKernel.h     |  7 +--
 .../core/NEON/kernels/NESpaceToBatchLayerKernel.h  |  9 ++--
 .../core/NEON/kernels/NESpaceToDepthLayerKernel.h  |  4 +-
 arm_compute/core/NEON/kernels/NEStackLayerKernel.h |  4 +-
 .../core/NEON/kernels/NEStridedSliceKernel.h       |  6 +--
 arm_compute/core/NEON/kernels/NETileKernel.h       |  4 +-
 arm_compute/core/NEON/kernels/NETransposeKernel.h  |  6 +--
 .../NEON/kernels/NEWidthConcatenateLayerKernel.h   |  5 +-
 arm_compute/core/PixelValue.h                      |  5 +-
 .../runtime/NEON/functions/NEBatchToSpaceLayer.h   |  8 +--
 .../runtime/NEON/functions/NEChannelShuffleLayer.h |  4 +-
 arm_compute/runtime/NEON/functions/NECol2Im.h      |  4 +-
 .../runtime/NEON/functions/NEConcatenateLayer.h    |  4 +-
 .../functions/NEConvertFullyConnectedWeights.h     |  5 +-
 arm_compute/runtime/NEON/functions/NECopy.h        |  4 +-
 .../runtime/NEON/functions/NEDepthToSpaceLayer.h   |  5 +-
 arm_compute/runtime/NEON/functions/NEFill.h        |  2 +-
 arm_compute/runtime/NEON/functions/NEFillBorder.h  |  5 +-
 .../runtime/NEON/functions/NEFlattenLayer.h        |  4 +-
 .../runtime/NEON/functions/NEFullyConnectedLayer.h |  4 +-
 .../runtime/NEON/functions/NEGEMMTranspose1xW.h    |  5 +-
 arm_compute/runtime/NEON/functions/NEGather.h      |  6 +--
 arm_compute/runtime/NEON/functions/NEIm2Col.h      |  5 +-
 arm_compute/runtime/NEON/functions/NEPadLayer.h    |  8 +--
 arm_compute/runtime/NEON/functions/NEPermute.h     |  5 +-
 arm_compute/runtime/NEON/functions/NEReorgLayer.h  |  4 +-
 .../runtime/NEON/functions/NEReshapeLayer.h        |  5 +-
 arm_compute/runtime/NEON/functions/NEReverse.h     |  4 +-
 arm_compute/runtime/NEON/functions/NESelect.h      |  4 +-
 arm_compute/runtime/NEON/functions/NESlice.h       |  4 +-
 .../runtime/NEON/functions/NESpaceToBatchLayer.h   |  8 +--
 .../runtime/NEON/functions/NESpaceToDepthLayer.h   |  4 +-
 arm_compute/runtime/NEON/functions/NESplit.h       |  4 +-
 arm_compute/runtime/NEON/functions/NEStackLayer.h  |  4 +-
 .../runtime/NEON/functions/NEStridedSlice.h        |  4 +-
 arm_compute/runtime/NEON/functions/NETile.h        |  4 +-
 arm_compute/runtime/NEON/functions/NETranspose.h   |  4 +-
 .../NEON/kernels/NEBatchConcatenateLayerKernel.cpp | 16 ++++--
 .../NEON/kernels/NEBatchToSpaceLayerKernel.cpp     |  1 +
 .../NEON/kernels/NEChannelShuffleLayerKernel.cpp   |  7 +--
 src/core/NEON/kernels/NECol2ImKernel.cpp           |  1 +
 .../NEConvertFullyConnectedWeightsKernel.cpp       | 14 +++---
 src/core/NEON/kernels/NECopyKernel.cpp             |  1 +
 .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 15 +++++-
 .../NEON/kernels/NEDepthToSpaceLayerKernel.cpp     |  1 +
 src/core/NEON/kernels/NEFillBorderKernel.cpp       |  6 +--
 src/core/NEON/kernels/NEFlattenLayerKernel.cpp     |  6 +--
 .../NEON/kernels/NEGEMMInterleave4x4Kernel.cpp     |  1 +
 src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp |  5 +-
 src/core/NEON/kernels/NEGatherKernel.cpp           | 58 +++++++++++-----------
 .../kernels/NEHeightConcatenateLayerKernel.cpp     | 19 ++++---
 src/core/NEON/kernels/NEPadLayerKernel.cpp         |  2 +
 src/core/NEON/kernels/NEPermuteKernel.cpp          |  1 +
 src/core/NEON/kernels/NEReorgLayerKernel.cpp       |  6 +--
 src/core/NEON/kernels/NEReshapeLayerKernel.cpp     |  4 +-
 src/core/NEON/kernels/NEReverseKernel.cpp          |  6 +--
 .../NEON/kernels/NESpaceToBatchLayerKernel.cpp     |  2 +
 .../NEON/kernels/NESpaceToDepthLayerKernel.cpp     |  1 +
 src/core/NEON/kernels/NEStackLayerKernel.cpp       |  4 +-
 src/core/NEON/kernels/NEStridedSliceKernel.cpp     |  6 +--
 src/core/NEON/kernels/NETileKernel.cpp             |  3 +-
 src/core/NEON/kernels/NETransposeKernel.cpp        |  5 +-
 .../NEON/kernels/NEWidthConcatenateLayerKernel.cpp | 19 ++++---
 src/runtime/NEON/functions/NECol2Im.cpp            |  7 +--
 src/runtime/NEON/functions/NECopy.cpp              |  7 +--
 src/runtime/NEON/functions/NEFillBorder.cpp        |  7 +--
 src/runtime/NEON/functions/NEFlattenLayer.cpp      |  9 ++--
 src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp  |  7 +--
 src/runtime/NEON/functions/NEGather.cpp            |  1 -
 src/runtime/NEON/functions/NEIm2Col.cpp            |  7 +--
 src/runtime/NEON/functions/NEPermute.cpp           |  7 +--
 src/runtime/NEON/functions/NEReshapeLayer.cpp      |  7 +--
 src/runtime/NEON/functions/NESelect.cpp            |  4 +-
 src/runtime/NEON/functions/NETranspose.cpp         |  9 ++--
 tests/validation/NEON/BatchConcatenateLayer.cpp    | 10 ++--
 tests/validation/NEON/DepthConcatenateLayer.cpp    | 10 ++--
 tests/validation/NEON/HeightConcatenateLayer.cpp   | 13 +++--
 tests/validation/NEON/WidthConcatenateLayer.cpp    | 10 ++--
 tests/validation/reference/ConcatenateLayer.cpp    | 23 +++++++--
 99 files changed, 347 insertions(+), 272 deletions(-)

diff --git a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
index 69223eea6a..e3a7847025 100644
--- a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
@@ -29,6 +29,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Interface for the batch concatenate kernel.
@@ -55,7 +56,7 @@ public:
     ~NEBatchConcatenateLayerKernel() = default;
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]     input        Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]     input        Input tensor. Data types supported: All.
      * @param[in]     batch_offset The offset on axis # 3.
      * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
      *
@@ -66,7 +67,7 @@ public:
     void configure(const ITensor *input, unsigned int batch_offset, ITensor *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel
      *
-     * @param[in] input        Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input        Input tensor info. Data types supported: All.
      * @param[in] batch_offset The offset on axis # 3.
      * @param[in] output       Output tensor info. Data types supported: Same as @p input.
      *
diff --git a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h
index cd1fa7e443..e9bbf4eb03 100644
--- a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h
@@ -52,14 +52,14 @@ public:
     ~NEBatchToSpaceLayerKernel() = default;
     /** Initialise the kernel's inputs and output.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
      * @param[out] output      Tensor output. Data types supported: same as @p input
      */
     void configure(const ITensor *input, const ITensor *block_shape, ITensor *output);
     /** Initialise the kernel's inputs and output (Static block shape).
      *
-     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape_x Block shape x value.
      * @param[in]  block_shape_y Block shape y value.
      * @param[out] output        Tensor output. Data types supported: same as @p input
@@ -67,7 +67,7 @@ public:
     void configure(const ITensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel
      *
-     * @param[in] input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input       Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
      * @param[in] output      Tensor output. Data types supported: same as @p input
      *
@@ -76,7 +76,7 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel (Static block shape).
      *
-     * @param[in] input         Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input         Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in] block_shape_x Block shape x value.
      * @param[in] block_shape_y Block shape y value.
      * @param[in] output        Tensor output. Data types supported: same as @p input
diff --git a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h b/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h
index b6ec60030f..71659c4fcb 100644
--- a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h
@@ -53,14 +53,14 @@ public:
     ~NEChannelShuffleLayerKernel() = default;
     /** Configure function's inputs and outputs.
      *
-     * @param[in]  input      Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input      Input tensor. Data types supported: All
      * @param[out] output     Output tensor. Data type supported: Same as @p input
      * @param[in]  num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
      */
     void configure(const ITensor *input, ITensor *output, unsigned int num_groups);
     /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayerKernel
      *
-     * @param[in]  input      Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input      Input tensor. Data types supported: All
      * @param[out] output     Output tensor. Data type supported: Same as @p input
      * @param[in]  num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
      *
diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h
index 7c19f08b3b..9aa1062622 100644
--- a/arm_compute/core/NEON/kernels/NECol2ImKernel.h
+++ b/arm_compute/core/NEON/kernels/NECol2ImKernel.h
@@ -72,7 +72,7 @@ public:
 
     /** Set the input and output of the kernel.
      *
-     * @param[in]  input          The input tensor to convert. Data types supported: Any
+     * @param[in]  input          The input tensor to convert. Data types supported: All
      * @param[out] output         The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
      *                            while the rest represent batch of outputs. Data types supported: Same as @p input
      * @param[in]  convolved_dims Output convolved dimensions.
@@ -80,7 +80,7 @@ public:
     void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
     /** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel
      *
-     * @param[in] input          The input tensor to convert. Data types supported: Any
+     * @param[in] input          The input tensor to convert. Data types supported: All
      * @param[in] output         The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
      *                           while the rest represent batch of outputs. Data types supported: Same as @p input
      * @param[in] convolved_dims Output convolved dimensions.
diff --git a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h b/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h
index c54339da72..d45191949a 100644
--- a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h
+++ b/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h
@@ -28,6 +28,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa.
@@ -59,7 +60,7 @@ public:
     ~NEConvertFullyConnectedWeightsKernel() = default;
     /** Set the input and output tensor.
      *
-     * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
      * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
      * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
      * @param[in]  data_layout          The data layout the weights have been trained in.
@@ -67,7 +68,7 @@ public:
     void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
     /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeightsKernel
      *
-     * @param[in] input                Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in] input                Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
      * @param[in] output               The converted weights tensor info. Shape and Data Type: Same as @p input.
      * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
      * @param[in] data_layout          The data layout the weights have been trained in.
diff --git a/arm_compute/core/NEON/kernels/NECopyKernel.h b/arm_compute/core/NEON/kernels/NECopyKernel.h
index db3f6a8ae8..d2dbbaef98 100644
--- a/arm_compute/core/NEON/kernels/NECopyKernel.h
+++ b/arm_compute/core/NEON/kernels/NECopyKernel.h
@@ -51,14 +51,14 @@ public:
     NECopyKernel &operator=(NECopyKernel &&) = default;
     /** Initialize the kernel's input, output.
      *
-     * @param[in]  input   Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input   Source tensor. Data types supported: All
      * @param[out] output  Destination tensor. Data types supported: same as @p input.
      * @param[in]  padding (Optional) Padding to be applied to the input tensor
      */
     void configure(const ITensor *input, ITensor *output, const PaddingList &padding = PaddingList());
     /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel
      *
-     * @param[in] input   Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input   Source tensor. Data types supported: All
      * @param[in] output  Destination tensor. Data types supported: same as @p input.
      * @param[in] padding (Optional) Padding to be applied to the input tensor
      *
diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
index 4fcaa24707..ddbd0983e4 100644
--- a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
@@ -29,6 +29,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Interface for the depth concatenate kernel.
@@ -55,7 +56,7 @@ public:
     ~NEDepthConcatenateLayerKernel() = default;
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]     input        Input tensor. Data types supported: QASYMM8/F16/F32.
+     * @param[in]     input        Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in]     depth_offset The offset on the Z axis.
      * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
      *
@@ -66,7 +67,7 @@ public:
     void configure(const ITensor *input, unsigned int depth_offset, ITensor *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel
      *
-     * @param[in] input        Input tensor info. Data types supported: QASYMM8/F16/F32.
+     * @param[in] input        Input tensor info. Data types supported:  QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in] depth_offset The offset on the Z axis.
      * @param[in] output       Output tensor info. Data types supported: Same as @p input.
      *
diff --git a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h
index f27a56c779..3129ea9fb9 100644
--- a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h
@@ -28,6 +28,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Interface for the depth to space kernel */
@@ -52,14 +53,14 @@ public:
     ~NEDepthToSpaceLayerKernel() = default;
     /** Initialise the kernel's inputs and output.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All
      * @param[out] output      Tensor output. Data types supported: same as @p input
      * @param[in]  block_shape Block shape x value.
      */
     void configure(const ITensor *input, ITensor *output, int32_t block_shape);
     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayerKernel.
      *
-     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All
      * @param[in] output      Tensor output info. Data types supported: same as @p input
      * @param[in] block_shape Block shape value.
      *
diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
index b345ec4cc9..0c852e8232 100644
--- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
+++ b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Interface for the kernel to fill borders */
@@ -57,7 +58,7 @@ public:
      *
      * @note This kernel fills the borders within the XY-planes.
      *
-     * @param[in,out] tensor                Tensor to process. Data types supported: U8/S8/QASYMM8/S16/S32/F32.
+     * @param[in,out] tensor                Tensor to process. Data types supported: All.
      * @param[in]     border_size           Size of the border to fill in elements.
      * @param[in]     border_mode           Border mode to use for the convolution.
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
diff --git a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h b/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h
index 8a7597a929..ba2f99857f 100644
--- a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h
@@ -54,7 +54,7 @@ public:
     /** Set the input and output of the kernel.
      *
      * @param[in]  input  First input tensor to flatten with at least 3 dimensions.
-     *                    The dimensions above the third will be interpreted as batches. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     *                    The dimensions above the third will be interpreted as batches. Data types supported: All
      * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
      *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
      */
@@ -62,7 +62,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel
      *
      * @param[in]  input  First input tensor to flatten with at least 3 dimensions.
-     *                    The dimensions above the third will be interpreted as batches. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     *                    The dimensions above the third will be interpreted as batches. Data types supported: All
      * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
      *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
      *
diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
index 0e2cf0e82d..967a1b73dc 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
@@ -28,6 +28,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
@@ -74,13 +75,13 @@ public:
     }
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/S8/QASYMM8/QSYMM8_PER_CHANNEL/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  Input tensor. Data types supported: All
      * @param[out] output Output tensor. Data type supported: same as @p input.
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xWKernel
      *
-     * @param[in] input  Input tensor info. Data types supported: U8/S8/QASYMM8/QSYMM8_PER_CHANNEL/U16/S16/F16/U32/S32/F32
+     * @param[in] input  Input tensor info. Data types supported: All
      * @param[in] output Output tensor info. Data type supported: same as @p input.
      *
      * @return a status
diff --git a/arm_compute/core/NEON/kernels/NEGatherKernel.h b/arm_compute/core/NEON/kernels/NEGatherKernel.h
index e619ee3574..bfef40b53b 100644
--- a/arm_compute/core/NEON/kernels/NEGatherKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGatherKernel.h
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Kernel to perform other operation on NEON */
@@ -59,7 +60,7 @@ public:
     }
     /** Initialise the kernel's inputs and outputs
      *
-     * @param[in]  input   Source tensor. Supported tensor rank: up to 4. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in]  input   Source tensor. Supported tensor rank: up to 4. Data type supported: All
      * @param[in]  indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
      * @param[out] output  Destination tensor. Data type supported: Same as @p input
      * @param[in]  axis    (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
@@ -67,7 +68,7 @@ public:
     void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0);
     /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel
      *
-     * @param[in] input   Source tensor info. Supported tensor rank: up to 4. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in] input   Source tensor info. Supported tensor rank: up to 4. Data type supported: All
      * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
      * @param[in] output  Destination tensor info. Data type supported: Same as @p input
      * @param[in] axis    (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
diff --git a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
index 91370bca41..be81f2e963 100644
--- a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Interface for the height concatenate kernel.
@@ -56,7 +57,7 @@ public:
     ~NEHeightConcatenateLayerKernel() = default;
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]     input         Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]     input         Input tensor. Data types supported: All
      * @param[in]     height_offset The starting offset on the Y axis for the output tensor.
      * @param[in,out] output        Output tensor. Data types supported: Same as @p input.
      *
@@ -64,7 +65,7 @@ public:
     void configure(const ITensor *input, unsigned int height_offset, ITensor *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel
      *
-     * @param[in] input         Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input         Input tensor info. Data types supported: All
      * @param[in] height_offset The starting offset on the Y axis for the output tensor.
      * @param[in] output        Output tensor info. Data types supported: Same as @p input.
      *
diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
index c9d22da254..8ee9660b95 100644
--- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
+++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
@@ -77,7 +77,7 @@ public:
     /** Set the input and output of the kernel.
      *
      * @param[in]  input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                         while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
+     *                         while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
      *                         Note: QASYMM8 works only for has_bias = false
      * @param[out] output      The output tensor. Data types supported: Same as @p input
      * @param[in]  kernel_dims The kernel dimensions (width and height).
@@ -91,7 +91,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel
      *
      * @param[in] input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                        while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
+     *                        while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
      *                        Note: QASYMM8 works only for has_bias = false
      * @param[in] output      The output tensor. Data types supported: Same as @p input
      * @param[in] kernel_dims The kernel dimensions (width and height).
diff --git a/arm_compute/core/NEON/kernels/NEMemsetKernel.h b/arm_compute/core/NEON/kernels/NEMemsetKernel.h
index 1ff302d6ed..b4bcd11b82 100644
--- a/arm_compute/core/NEON/kernels/NEMemsetKernel.h
+++ b/arm_compute/core/NEON/kernels/NEMemsetKernel.h
@@ -55,7 +55,7 @@ public:
     NEMemsetKernel &operator=(NEMemsetKernel &&) = default;
     /** Initialise the kernel's tensor and filling value
      *
-     * @param[in,out] tensor         Input tensor to fill. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in,out] tensor         Input tensor to fill. Supported data types: All
      * @param[in]     constant_value The value used to fill the planes of the tensor
      */
     void configure(ITensor *tensor, const PixelValue &constant_value);
diff --git a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h b/arm_compute/core/NEON/kernels/NEPadLayerKernel.h
index 8d58fa9c65..4cbefbd1e3 100644
--- a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPadLayerKernel.h
@@ -56,7 +56,7 @@ public:
 
     /** Initialize the function
      *
-     * @param[in]  input          Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input          Source tensor. Data types supported: All.
      * @param[out] output         Output tensor. Data type supported: same as @p input
      * @param[in]  padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
      *                            specifies the front and the end padding in the i-th dimension.
@@ -67,7 +67,7 @@ public:
     void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
     /**  Static function to check if given info will lead to a valid configuration of @ref NEPadLayer.
      *
-     * @param[in] input          Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input          Source tensor info. Data types supported: All.
      * @param[in] output         Output tensor info. Data type supported: same as @p input
      * @param[in] padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
      *                           specifies the front and the end padding in the i-th dimension.
diff --git a/arm_compute/core/NEON/kernels/NEPermuteKernel.h b/arm_compute/core/NEON/kernels/NEPermuteKernel.h
index 794259c262..89dc4e6fc7 100644
--- a/arm_compute/core/NEON/kernels/NEPermuteKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPermuteKernel.h
@@ -28,6 +28,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** NEON kernel to perform tensor permutation.
@@ -58,7 +59,7 @@ public:
      *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
      *
-     * @param[in]  input  The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  The input tensor to permute. Data types supported: All
      * @param[out] output The output tensor. Data types supported: Same as @p input
      * @param[in]  perm   Permutation vector
      */
@@ -67,7 +68,7 @@ public:
      *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
      *
-     * @param[in] input  The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input  The input tensor to permute. Data types supported: All
      * @param[in] output The output tensor. Data types supported: Same as @p input
      * @param[in] perm   Permutation vector
      *
diff --git a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h
index 195f0c2336..9277ddbe47 100644
--- a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h
@@ -53,7 +53,7 @@ public:
     ~NEReorgLayerKernel() = default;
     /** Set the input and output of the kernel
      *
-     * @param[in]  input  Source tensor. Data type supported: U8/S8/U16/S16/QASYMM8/U32/S32/F16/F32
+     * @param[in]  input  Source tensor. Data type supported: All
      * @param[out] output Destination tensor. Data type supported: Same as @p input
      * @param[in]  stride Stride to be used during data re-organization.
      *                    It defines the spatial distance between 2 consecutive pixels in the x and y direction
@@ -62,7 +62,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel
      *
-     * @param[in] input  Source tensor info. Data type supported: U8/S8/U16/S16/QASYMM8/U32/S32/F16/F32
+     * @param[in] input  Source tensor info. Data type supported: All
      * @param[in] output Destination tensor info. Data type supported: Same as @p input
      * @param[in] stride Stride to be used during data re-organization
      *                   It defines the spatial distance between 2 consecutive pixels in the x and y direction
diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
index 2871bc2fb2..fccf2685a8 100644
--- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
@@ -28,6 +28,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Interface for the kernel to perform tensor reshaping */
@@ -40,14 +41,14 @@ public:
     }
     /** Set the input and output of the kernel
      *
-     * @param[in]  input  Source tensor. Data type supported: U8/S8/U16/S16/QASYMM8/U32/S32/F16/F32
+     * @param[in]  input  Source tensor. Data type supported: All
      * @param[out] output Destination tensor. Data type supported: Same as @p input
      */
     void configure(const ITensor *input, ITensor *output);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel
      *
-     * @param[in] input  Source tensor info. Data type supported: U8/S8/U16/S16/QASYMM8/U32/S32/F16/F32
+     * @param[in] input  Source tensor info. Data type supported: All
      * @param[in] output Destination tensor info. Data type supported: Same as @p input
      *
      * @return a status
diff --git a/arm_compute/core/NEON/kernels/NEReverseKernel.h b/arm_compute/core/NEON/kernels/NEReverseKernel.h
index a8a1fdaac8..516653b70d 100644
--- a/arm_compute/core/NEON/kernels/NEReverseKernel.h
+++ b/arm_compute/core/NEON/kernels/NEReverseKernel.h
@@ -52,7 +52,7 @@ public:
     ~NEReverseKernel() = default;
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  Input tensor. Data types supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      * @param[in]  axis   Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
      */
@@ -60,7 +60,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel
      *
-     * @param[in] input  Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input  Input tensor info. Data types supported: All
      * @param[in] output Output tensor info. Data type supported: Same as @p input
      * @param[in] axis   Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
      *
diff --git a/arm_compute/core/NEON/kernels/NESelectKernel.h b/arm_compute/core/NEON/kernels/NESelectKernel.h
index 1711246b27..51c8543ddc 100644
--- a/arm_compute/core/NEON/kernels/NESelectKernel.h
+++ b/arm_compute/core/NEON/kernels/NESelectKernel.h
@@ -29,6 +29,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Interface for the select kernel
@@ -60,7 +61,7 @@ public:
     /** Common signature for all the specialised elementwise functions
      *
      * @param[in]  c      Condition input tensor. Data types supported: U8.
-     * @param[in]  x      First input tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  x      First input tensor. Data types supported: All.
      * @param[out] y      Second input tensor. Data types supported: Same as @p x
      * @param[in]  output Output tensor. Data types supported: Same as @p x
      */
@@ -69,7 +70,7 @@ public:
     /** Validate the argument passed to the kernel
      *
      * @param[in] c      Condition input tensor. Data types supported: U8.
-     * @param[in] x      First input tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in] x      First input tensor. Data types supported: All.
      * @param[in] y      Second input tensor. Data types supported: Same as @p x
      * @param[in] output Output tensor. Data types supported: Same as @p x.
      *
@@ -84,7 +85,7 @@ private:
     /** Common signature for all the specialised select functions
      *
      * @param[in] c      Condition input tensor. Data types supported: U8.
-     * @param[in] x      First input tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in] x      First input tensor. Data types supported: All.
      * @param[in] y      Second input tensor. Data types supported: Same as @p x
      * @param[in] output Output tensor. Data types supported: Same as @p x.
      */
diff --git a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h
index 79c321f919..ec88f03df4 100644
--- a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h
@@ -29,6 +29,7 @@
 
 namespace arm_compute
 {
+// Forward declaration
 class ITensor;
 
 /** Interface for the space to batch kernel */
@@ -53,7 +54,7 @@ public:
     ~NESpaceToBatchLayerKernel() = default;
     /** Initialise the kernel's inputs and output.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
      * @param[in]  paddings    2-D tensor with shape [2, M]. Data types supported: S32
      * @param[out] output      Tensor output. Data types supported: same as @p input
@@ -61,7 +62,7 @@ public:
     void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output);
     /** Initialise the kernel's input and output. (Static block shape and paddings)
      *
-     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape_x Block shape x value.
      * @param[in]  block_shape_y Block shape y value.
      * @param[in]  padding_left  The left padding of the output tensor.
@@ -71,7 +72,7 @@ public:
     void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel
      *
-     * @param[in] input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input       Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
      * @param[in] paddings    2-D tensor with shape [2, M]. Data types supported: S32
      * @param[in] output      Tensor output. Data types supported: same as @p input
@@ -81,7 +82,7 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel (Static block shape and paddings)
      *
-     * @param[in] input         Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input         Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in] block_shape_x Block shape x value.
      * @param[in] block_shape_y Block shape y value.
      * @param[in] padding_left  The left padding of the output tensor.
diff --git a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h
index fe783a7159..87f8c67635 100644
--- a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h
@@ -53,14 +53,14 @@ public:
     ~NESpaceToDepthLayerKernel() = default;
     /** Initialise the kernel's inputs and output.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[out] output      Tensor output. Data types supported: same as @p input
      * @param[in]  block_shape Block shape value
      */
     void configure(const ITensor *input, ITensor *output, int32_t block_shape);
     /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayerKernel
      *
-     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
      * @param[in] output      Tensor output info. Data types supported: same as @p input
      * @param[in] block_shape Block shape value
      *
diff --git a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h
index 04ad5d10dc..c4dc53eac6 100644
--- a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h
@@ -56,7 +56,7 @@ public:
      *
      * @note Supported input tensor rank: up to 4
      *
-     * @param[in]  input       Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input       Input tensor. Data types supported: All
      * @param[in]  axis        The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
      * @param[in]  idx_input   Index of the input tensor in the list of tensors to stack.
      *                         All tensors in the list must have the same shape
@@ -69,7 +69,7 @@ public:
      *
      * @note Supported input tensor rank: up to 4
      *
-     * @param[in] input       Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input       Input tensor info. Data types supported: All
      * @param[in] axis        The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
      * @param[in] idx_input   Index of the input tensor in the list of tensors to stack
      *                        All tensors in the list must have the same shape
diff --git a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h
index fc624f047b..6709619a62 100644
--- a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h
+++ b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h
@@ -58,7 +58,7 @@ public:
      *
      * @note Supported tensor rank: up to 4
      *
-     * @param[in]  input            Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/QASYMM16/QSYMM16/U32/S32/F16/F32
+     * @param[in]  input            Source tensor. Data type supported: All
      * @param[out] output           Destination tensor. Data type supported: Same as @p input
      * @param[in]  starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in]  ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
@@ -72,11 +72,11 @@ public:
                    const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
                    int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
 
-    /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
+    /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSliceKernel
      *
      * @note Supported tensor rank: up to 4
      *
-     * @param[in] input            Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/QASYMM16/QSYMM16/U32/S32/F16/F32
+     * @param[in] input            Source tensor info. Data type supported: All
      * @param[in] output           Destination tensor info. Data type supported: Same as @p input
      * @param[in] starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in] ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
diff --git a/arm_compute/core/NEON/kernels/NETileKernel.h b/arm_compute/core/NEON/kernels/NETileKernel.h
index 93afc80651..a64470ffd0 100644
--- a/arm_compute/core/NEON/kernels/NETileKernel.h
+++ b/arm_compute/core/NEON/kernels/NETileKernel.h
@@ -50,14 +50,14 @@ public:
     }
     /** Set the source, destination of the kernel
      *
-     * @param[in]  input     Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input     Source tensor. Data type supported: All.
      * @param[out] output    Destination tensor. Same as @p input
      * @param[in]  multiples Contains the number of times the input tensor should be replicated on the given dimension.
      */
     void configure(const ITensor *input, ITensor *output, const Multiples &multiples);
     /** Static function to check if given info will lead to a valid configuration of @ref NETileKernel
      *
-     * @param[in] input     Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input     Source tensor info. Data type supported: All.
      * @param[in] output    Destination tensor info. Same as @p input
      * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
      *
diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h
index d73813c846..a14dece0d6 100644
--- a/arm_compute/core/NEON/kernels/NETransposeKernel.h
+++ b/arm_compute/core/NEON/kernels/NETransposeKernel.h
@@ -57,13 +57,13 @@ public:
 
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  Input tensor. Data types supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel
      *
-     * @param[in] input  Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input  Input tensor. Data types supported: All
      * @param[in] output Output tensor. Data type supported: Same as @p input
      *
      * @return a status
@@ -76,7 +76,7 @@ public:
 private:
     /** Common signature for all the transpose functions
      *
-     * @param[in]  input  An input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  An input tensor. Data types supported: All
      * @param[out] output The output tensor. Data type supported: same as @p input
      * @param[in]  window Region on which to execute the kernel.
      */
diff --git a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
index ccd70a4ebd..f22f18f09f 100644
--- a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Interface for the width concatenate kernel.
@@ -56,7 +57,7 @@ public:
     ~NEWidthConcatenateLayerKernel() = default;
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]     input        Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]     input        Input tensor. Data types supported: All
      * @param[in]     width_offset The offset on the X axis.
      * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
      *
@@ -64,7 +65,7 @@ public:
     void configure(const ITensor *input, unsigned int width_offset, ITensor *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel
      *
-     * @param[in] input        Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input        Input tensor info. Data types supported: All
      * @param[in] width_offset The offset on the X axis.
      * @param[in] output       Output tensor info. Data types supported: Same as @p input.
      *
diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h
index f39c8670fd..8c2ab92ad9 100644
--- a/arm_compute/core/PixelValue.h
+++ b/arm_compute/core/PixelValue.h
@@ -59,6 +59,9 @@ public:
             case DataType::QASYMM8:
                 value.u8 = quantize_qasymm8(static_cast<uint8_t>(v), qinfo);
                 break;
+            case DataType::QASYMM8_SIGNED:
+                value.u8 = quantize_qasymm8_signed(static_cast<int8_t>(v), qinfo);
+                break;
             case DataType::QSYMM8:
                 value.s8 = quantize_qsymm8(static_cast<int8_t>(v), qinfo);
                 break;
@@ -311,5 +314,5 @@ public:
         return val;
     }
 };
-}
+} // namespace arm_compute
 #endif /* ARM_COMPUTE_PIXELVALUE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
index f0a33a71f2..2a62530246 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
@@ -40,14 +40,14 @@ class NEBatchToSpaceLayer : public INESimpleFunctionNoBorder
 public:
     /** Set the input and output tensors.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
      * @param[out] output      Tensor output. Data types supported: same as @p input
      */
     void configure(const ITensor *input, const ITensor *block_shape, ITensor *output);
     /** Set the input and output tensors. (Static block shape).
      *
-     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape_x Block shape x value.
      * @param[in]  block_shape_y Block shape y value.
      * @param[out] output        Tensor output. Data types supported: same as @p input
@@ -55,7 +55,7 @@ public:
     void configure(const ITensor *input, int32_t block_shape_x, int32_t block_shape_y, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer
      *
-     * @param[in]  input       Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape block shape tensor info with shape [M]. Data types supported: S32
      * @param[out] output      Tensor output info. Data types supported: same as @p input
      *
@@ -64,7 +64,7 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer (Static block shape).
      *
-     * @param[in]  input         Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input         Tensor input info. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape_x Block shape x value.
      * @param[in]  block_shape_y Block shape y value.
      * @param[out] output        Tensor output info. Data types supported: same as @p input
diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
index ecc1822f9d..716518a8da 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
@@ -42,14 +42,14 @@ class NEChannelShuffleLayer : public INESimpleFunctionNoBorder
 public:
     /** Initialize the function
      *
-     * @param[in]  input      Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input      Input tensor. Data types supported: All
      * @param[out] output     Output tensor. Data type supported: Same as @p input
      * @param[in]  num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
      */
     void configure(const ITensor *input, ITensor *output, unsigned int num_groups);
     /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayer
      *
-     * @param[in]  input      Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input      Input tensor. Data types supported: All
      * @param[out] output     Output tensor. Data type supported: Same as @p input
      * @param[in]  num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
      *
diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h
index 8987c82714..5da0b91766 100644
--- a/arm_compute/runtime/NEON/functions/NECol2Im.h
+++ b/arm_compute/runtime/NEON/functions/NECol2Im.h
@@ -39,7 +39,7 @@ class NECol2Im : public INESimpleFunctionNoBorder
 public:
     /** Configure the col2im NEON kernel
      *
-     * @param[in]  input          The input tensor to convert. Data types supported: Any
+     * @param[in]  input          The input tensor to convert. Data types supported: All
      * @param[out] output         The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
      *                            while the rest represent batch of outputs. Data types supported: Same as @p input
      * @param[in]  convolved_dims Output convolved dimensions.
@@ -47,7 +47,7 @@ public:
     void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
     /** Static function to check if given info will lead to a valid configuration of @ref NECol2Im
      *
-     * @param[in] input          The input tensor to convert. Data types supported: Any
+     * @param[in] input          The input tensor to convert. Data types supported: All
      * @param[in] output         The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
      *                           while the rest represent batch of outputs. Data types supported: Same as @p input
      * @param[in] convolved_dims Output convolved dimensions.
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index 3591cfd9da..8207589680 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -57,7 +57,7 @@ public:
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
      * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
      *
-     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32.
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out]    output        Output tensor. Data types supported: Same as @p input.
      * @param[in]     axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      */
@@ -68,7 +68,7 @@ public:
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
      * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
      *
-     * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32.
+     * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in] output        Output tensor info. Data types supported: Same as @p input.
      * @param[in] axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      *
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index 37b6b6c4dd..42f787090e 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -32,6 +32,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */
@@ -42,7 +43,7 @@ public:
     NEConvertFullyConnectedWeights();
     /** Initialize the function.
      *
-     * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
      * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
      * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
      * @param[in]  data_layout          The data layout the weights have been trained in.
@@ -50,7 +51,7 @@ public:
     void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
     /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeights
      *
-     * @param[in] input                Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in] input                Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
      * @param[in] output               The converted weights tensor info. Shape and Data Type: Same as @p input.
      * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
      * @param[in] data_layout          The data layout the weights have been trained in.
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index 7ad89e27d8..b03f408ad8 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -37,14 +37,14 @@ class NECopy : public INESimpleFunctionNoBorder
 public:
     /** Initialise the function's source and destination.
      *
-     * @param[in]  input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input  Source tensor. Data types supported: All
      * @param[out] output Output tensor. Data types supported: Same as @p input.
      *
      */
     void configure(ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NECopy
      *
-     * @param[in] input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input  Source tensor. Data types supported: All
      * @param[in] output Output tensor. Data types supported: Same as @p input.
      *
      * @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
index 56c237420b..3c21d1a33a 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
@@ -32,6 +32,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEDepthToSpaceLayerKernel. */
@@ -40,14 +41,14 @@ class NEDepthToSpaceLayer : public INESimpleFunctionNoBorder
 public:
     /** Set the input and output tensors.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All
      * @param[out] output      Tensor output. Data types supported: same as @p input
      * @param[in]  block_shape Block shape value.
      */
     void configure(const ITensor *input, ITensor *output, int32_t block_shape);
     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayer.
      *
-     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All
      * @param[in] output      Tensor output info. Data types supported: same as @p input
      * @param[in] block_shape Block shape x value.
      *
diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h
index e5badcb7b6..f8a15078c3 100644
--- a/arm_compute/runtime/NEON/functions/NEFill.h
+++ b/arm_compute/runtime/NEON/functions/NEFill.h
@@ -39,7 +39,7 @@ class NEFill : public INESimpleFunctionNoBorder
 public:
     /** Initialize the function
      *
-     * @param[in,out] tensor         Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in,out] tensor         Source tensor. Data types supported: All
      * @param[in]     constant_value Constant value to use to fill tensor.
      */
     void configure(ITensor *tensor, PixelValue constant_value);
diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h
index b8266d9d67..0ae04cbf00 100644
--- a/arm_compute/runtime/NEON/functions/NEFillBorder.h
+++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h
@@ -31,6 +31,7 @@
 
 namespace arm_compute
 {
+// Forward declaration
 class ITensor;
 
 /** Basic function to run @ref NEFillBorderKernel */
@@ -41,7 +42,7 @@ public:
      *
      * @note This function fills the borders within the XY-planes.
      *
-     * @param[in, out] input                 Source tensor. Data type supported: U8/S16/S32/F32
+     * @param[in, out] input                 Source tensor. Data type supported: All
      * @param[in]      border_width          Width of the tensor border in pixels.
      * @param[in]      border_mode           Strategy to use for borders.
      * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
@@ -54,5 +55,5 @@ public:
 private:
     NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */
 };
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_NEFILLBORDER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
index db73f52f4a..7b4801cd1c 100644
--- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
@@ -37,7 +37,7 @@ class NEFlattenLayer : public INESimpleFunctionNoBorder
 public:
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input  First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: F16/F32
+     * @param[in]  input  First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: All
      * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
      *             w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
      */
@@ -46,7 +46,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayer
      *
      * @param[in]  input  First input tensor to flatten with at least 3 dimensions.
-     *                    The dimensions above the third will be interpreted as batches. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     *                    The dimensions above the third will be interpreted as batches. Data types supported: All
      * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
      *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
      *
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 5eabdcdb6b..8150737ebe 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -49,13 +49,13 @@ class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder
 public:
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32.
+     * @param[in]  input  Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out] output Destination tensor. Data type supported: Same as @p input.
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayerReshapeWeights
      *
-     * @param[in] input  Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32.
+     * @param[in] input  Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in] output Destination tensor info. Data type supported: Same as @p input.
      *
      * @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
index 8b66eb23b9..6a38490ed4 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
@@ -28,6 +28,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels:
@@ -40,13 +41,13 @@ class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder
 public:
     /** Initialise the kernel's inputs, output
      *
-     * @param[in]  input  First input tensor. Data type supported: U8/S8/QASYMM8/QSYMM8_PER_CHANNEL/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  First input tensor. Data type supported: All
      * @param[out] output Output tensor. Data type supported: same as @p input
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xW
      *
-     * @param[in] input  First input tensor. Data type supported: U8/S8/QASYMM8/QSYMM8_PER_CHANNEL/U16/S16/F16/U32/S32/F32
+     * @param[in] input  First input tensor. Data type supported: All
      * @param[in] output Output tensor. Data type supported: same as @p input
      *
      * @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h
index 6b6072cd53..7ed45c0f15 100644
--- a/arm_compute/runtime/NEON/functions/NEGather.h
+++ b/arm_compute/runtime/NEON/functions/NEGather.h
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEGatherKernel */
@@ -38,7 +39,7 @@ class NEGather : public INESimpleFunctionNoBorder
 public:
     /** Initialise the kernel's inputs and outputs
      *
-     * @param[in]  input   Source tensor. Supported tensor rank: up to 4. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in]  input   Source tensor. Supported tensor rank: up to 4. Data type supported: All
      * @param[in]  indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
      * @param[out] output  Destination tensor. Data type supported: Same as @p input
      * @param[in]  axis    (Optional) The axis in @p input to gather @p indices from. Defaults to 0
@@ -47,7 +48,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel
      *
-     * @param[in] input   Source tensor info. Supported tensor rank: up to 4. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in] input   Source tensor info. Supported tensor rank: up to 4. Data type supported: All
      * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value Must be in range [0, input.shape[@p axis])
      * @param[in] output  Destination tensor info. Data type supported: Same as @p input
      * @param[in] axis    (Optional) The axis in @p input to gather @p indices from. Defaults to 0
@@ -56,7 +57,6 @@ public:
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis);
 };
-
 } // namespace arm_compute
 
 #endif /* ARM_COMPUTE_NEGATHER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h
index 475e1d6160..ac1b2674a5 100644
--- a/arm_compute/runtime/NEON/functions/NEIm2Col.h
+++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h
@@ -32,6 +32,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEIm2ColKernel */
@@ -43,7 +44,7 @@ public:
     /** Configure the im2col NEON kernel
      *
      * @param[in]  input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                         while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
+     *                         while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
      *                         Note: QASYMM8 works only for has_bias = false
      * @param[out] output      The output tensor. Data types supported: Same as @p input
      * @param[in]  kernel_dims The kernel dimensions (width and height).
@@ -57,7 +58,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col
      *
      * @param[in] input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                        while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
+     *                        while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
      *                        Note: QASYMM8 works only for has_bias = false
      * @param[in] output      The output tensor. Data types supported: Same as @p input
      * @param[in] kernel_dims The kernel dimensions (width and height).
diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h
index a17739385c..d3074e70bc 100644
--- a/arm_compute/runtime/NEON/functions/NEPadLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h
@@ -53,7 +53,7 @@ public:
     NEPadLayer();
     /** Initialize the function
      *
-     * @param[in]  input          Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input          Source tensor. Data types supported: All.
      * @param[out] output         Output tensor. Data type supported: same as @p input
      * @param[in]  padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
      *                            specifies the front and the end padding in the i-th dimension.
@@ -64,7 +64,7 @@ public:
     void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
     /**  Static function to check if given info will lead to a valid configuration of @ref NEPadLayer.
      *
-     * @param[in] input          Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input          Source tensor info. Data types supported: All.
      * @param[in] output         Output tensor info. Data type supported: same as @p input
      * @param[in] padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
      *                           specifies the front and the end padding in the i-th dimension.
@@ -82,7 +82,7 @@ public:
 private:
     /** Configure kernels for when constant padding is used.
      *
-     * @param[in]  input          Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input          Source tensor. Data types supported: All.
      * @param[out] output         Output tensor. Data type supported: same as @p input
      * @param[in]  padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
      *                            specifies the front and the end padding in the i-th dimension.
@@ -91,7 +91,7 @@ private:
     void configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value);
     /** Configure functions for when reflect or symmetric padding is used.
      *
-     * @param[in]  input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input  Source tensor. Data types supported: All.
      * @param[out] output Output tensor. Data type supported: same as @p input
      */
     void configure_reflect_symmetric_mode(ITensor *input, ITensor *output);
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index 72e864283e..4651b30e8e 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEPermuteKernel */
@@ -40,7 +41,7 @@ public:
      *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
      *
-     * @param[in]  input  The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  The input tensor to permute. Data types supported: All
      * @param[out] output The output tensor. Data types supported: Same as @p input
      * @param[in]  perm   Permutation vector
      */
@@ -49,7 +50,7 @@ public:
      *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
      *
-     * @param[in] input  The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input  The input tensor to permute. Data types supported: All
      * @param[in] output The output tensor. Data types supported: Same as @p input
      * @param[in] perm   Permutation vector
      *
diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
index 5f28821a6c..8ef7f8a1b2 100644
--- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
@@ -38,7 +38,7 @@ class NEReorgLayer : public INESimpleFunctionNoBorder
 public:
     /** Initialise the kernel's inputs and outputs
      *
-     * @param[in]  input  First tensor input. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32
+     * @param[in]  input  First tensor input. Data type supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      * @param[in]  stride Stride to be used during data re-organization
      *                    It defines the spatial distance between 2 consecutive pixels in the x and y direction
@@ -47,7 +47,7 @@ public:
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEReorgLayer
      *
-     * @param[in] input  First tensor info. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32
+     * @param[in] input  First tensor info. Data type supported: All
      * @param[in] output Output tensor info. Data type supported: Same as @p input
      * @param[in] stride Stride to be used during data re-organization
      *                   It defines the spatial distance between 2 consecutive pixels in the x and y direction
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index c57387a0cd..d6643842d9 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -29,6 +29,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEReshapeLayerKernel */
@@ -37,14 +38,14 @@ class NEReshapeLayer : public INESimpleFunctionNoBorder
 public:
     /** Initialise the kernel's inputs and outputs
      *
-     * @param[in]  input  First tensor input. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32
+     * @param[in]  input  First tensor input. Data type supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
     void configure(const ITensor *input, ITensor *output);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayer
      *
-     * @param[in] input  First tensor info. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32
+     * @param[in] input  First tensor info. Data type supported: All
      * @param[in] output Output tensor info. Data type supported: Same as @p input
      *
      * @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h
index 6414cbd4d1..ab5a5d0869 100644
--- a/arm_compute/runtime/NEON/functions/NEReverse.h
+++ b/arm_compute/runtime/NEON/functions/NEReverse.h
@@ -38,14 +38,14 @@ class NEReverse : public INESimpleFunctionNoBorder
 public:
     /** Initialize the function
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  Input tensor. Data types supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      * @param[in]  axis   Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
      */
     void configure(const ITensor *input, ITensor *output, const ITensor *axis);
     /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel
      *
-     * @param[in] input  Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input  Input tensor info. Data types supported: All
      * @param[in] output Output tensor info. Data type supported: Same as @p input
      * @param[in] axis   Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
      *
diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h
index 049267a692..6ac328080d 100644
--- a/arm_compute/runtime/NEON/functions/NESelect.h
+++ b/arm_compute/runtime/NEON/functions/NESelect.h
@@ -39,7 +39,7 @@ public:
     /** Initialise the kernel's inputs and output.
      *
      * @param[in]  c      Condition input tensor. Data types supported: U8.
-     * @param[in]  x      First input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  x      First input tensor. Data types supported: All.
      * @param[in]  y      Second input tensor. Data types supported: Same as @p x
      * @param[out] output Output tensor. Data types supported: Same as @p x.
      */
@@ -47,7 +47,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref NESelect
      *
      * @param[in] c      Condition input tensor. Data types supported: U8.
-     * @param[in] x      First input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in] x      First input tensor. Data types supported: All.
      * @param[in] y      Second input tensor. Data types supported: Same as @p x
      * @param[in] output Output tensor. Data types supported: Same as @p x.
      *
diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h
index 179c64a420..834ec27a33 100644
--- a/arm_compute/runtime/NEON/functions/NESlice.h
+++ b/arm_compute/runtime/NEON/functions/NESlice.h
@@ -42,7 +42,7 @@ public:
      * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
      * @note End indices are not inclusive unless negative.
      *
-     * @param[in]  input  Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in]  input  Source tensor. Data type supported: All
      * @param[out] output Destination tensor. Data type supported: Same as @p input
      * @param[in]  starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in]  ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
@@ -56,7 +56,7 @@ public:
      * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
      * @note End indices are not inclusive unless negative.
      *
-     * @param[in] input  Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in] input  Source tensor info. Data type supported: All
      * @param[in] output Destination tensor info. Data type supported: Same as @p input
      * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in] ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
index c433f183a0..75fa50c1b0 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
@@ -56,7 +56,7 @@ public:
     virtual ~NESpaceToBatchLayer() = default;
     /** Set the input and output tensors.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
      * @param[in]  paddings    2-D tensor with shape [2, M]. Data types supported: S32
      * @param[out] output      Tensor output. Data types supported: same as @p input
@@ -64,7 +64,7 @@ public:
     void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output);
     /** Set the input and output tensors. (Static block shape and paddings)
      *
-     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape_x Block shape x value.
      * @param[in]  block_shape_y Block shape y value.
      * @param[in]  padding_left  The left padding of the output tensor.
@@ -74,7 +74,7 @@ public:
     void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer
      *
-     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
      * @param[in] block_shape block shape tensor info with shape [M]. Data types supported: S32
      * @param[in] paddings    paddings tensor info with shape [2, M]. Data types supported: S32
      * @param[in] output      Tensor output info. Data types supported: same as @p input
@@ -84,7 +84,7 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer (Static block shape and paddings)
      *
-     * @param[in] input         Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input         Tensor input info. Supported tensor rank: 4. Data types supported: All.
      * @param[in] block_shape_x Block shape x value.
      * @param[in] block_shape_y Block shape y value.
      * @param[in] padding_left  The left padding of the output tensor.
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
index ac93b48c0c..6a7a9c83a1 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
@@ -55,14 +55,14 @@ public:
     virtual ~NESpaceToDepthLayer() = default;
     /** Set the input and output tensors.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[out] output      Tensor output. Data types supported: same as @p input
      * @param[in]  block_shape Block shape value
      */
     void configure(const ITensor *input, ITensor *output, int32_t block_shape);
     /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayer (Static block shape and paddings)
      *
-     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
      * @param[in] output      Tensor output info. Data types supported: same as @p input
      * @param[in] block_shape Block shape value
      *
diff --git a/arm_compute/runtime/NEON/functions/NESplit.h b/arm_compute/runtime/NEON/functions/NESplit.h
index 26ff4059f1..e4d62048e6 100644
--- a/arm_compute/runtime/NEON/functions/NESplit.h
+++ b/arm_compute/runtime/NEON/functions/NESplit.h
@@ -45,7 +45,7 @@ public:
     NESplit();
     /** Initialise the kernel's input and outputs.
      *
-     * @param[in]  input   The input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  input   The input tensor. Data types supported: All
      * @param[out] outputs A vector containing the output tensors. Data types supported: Same as @p input.
      *                     The output tensors should match the input tensor dimensions for all shape dimensions apart
      *                     from the split dimension.
@@ -54,7 +54,7 @@ public:
     void configure(const ITensor *input, const std::vector<ITensor *> &outputs, unsigned int axis);
     /** Static function to check if given info will lead to a valid configuration of @ref NESplit
      *
-     * @param[in] input   The input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in] input   The input tensor info. Data types supported: All
      * @param[in] outputs A vector containing the output tensors' info. Data types supported: Same as @p input.
      *                    The output tensors should match the input tensor dimensions for all shape dimensions apart
      *                    from the split dimension
diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h
index e83995619d..9288035060 100644
--- a/arm_compute/runtime/NEON/functions/NEStackLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h
@@ -50,7 +50,7 @@ public:
      *
      * @note Supported input tensor rank: up to 4
      *
-     * @param[in]  input  The vectors containing all the tensors with the same shape to stack. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  The vectors containing all the tensors with the same shape to stack. Data types supported: All
      * @param[in]  axis   The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
      *                    Negative values wrap around
      * @param[out] output Output tensor. Data types supported: Same as @p input.
@@ -60,7 +60,7 @@ public:
      *
      * @note Supported input tensor rank: up to 4
      *
-     * @param[in] input  The vectors containing all the tensors info with the same shape to stack. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input  The vectors containing all the tensors info with the same shape to stack. Data types supported: All
      * @param[in] axis   The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
      *                   Negative values wrap around
      * @param[in] output Output tensor info. Data types supported: Same as @p input.
diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
index d8c3918385..6d5e6392f5 100644
--- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h
+++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
@@ -39,7 +39,7 @@ public:
      *
      * @note Supported tensor rank: up to 4
      *
-     * @param[in]  input            Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in]  input            Source tensor. Data type supported: All
      * @param[out] output           Destination tensor. Data type supported: Same as @p input
      * @param[in]  starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in]  ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
@@ -57,7 +57,7 @@ public:
      *
      * @note Supported tensor rank: up to 4
      *
-     * @param[in] input            Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in] input            Source tensor info. Data type supported: All
      * @param[in] output           Destination tensor info. Data type supported: Same as @p input
      * @param[in] starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in] ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h
index c7a08961a4..14d4f221f1 100644
--- a/arm_compute/runtime/NEON/functions/NETile.h
+++ b/arm_compute/runtime/NEON/functions/NETile.h
@@ -38,14 +38,14 @@ class NETile : public INESimpleFunctionNoBorder
 public:
     /** Set the source, destination of the kernel
      *
-     * @param[in]  input     Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input     Source tensor. Data type supported: All.
      * @param[out] output    Destination tensor. Same as @p input
      * @param[in]  multiples Contains the number of times the input tensor should be replicated on the given dimension.
      */
     void configure(const ITensor *input, ITensor *output, const Multiples &multiples);
     /** Static function to check if given info will lead to a valid configuration of @ref NETile
      *
-     * @param[in] input     Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input     Source tensor info. Data type supported: All.
      * @param[in] output    Destination tensor info. Same as @p input
      * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
      *
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 6148a67203..03c90e5b28 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -41,13 +41,13 @@ class NETranspose : public INESimpleFunctionNoBorder
 public:
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  Input tensor. Data types supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NETranspose
      *
-     * @param[in] input  The input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input  The input tensor. Data types supported: All
      * @param[in] output The output tensor. Data types supported: Same as @p input
      *
      * @return a status
diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
index cfa98fb19a..3e250f5d2e 100644
--- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp
@@ -66,6 +66,16 @@ void batch_concat(const ITensor *in, ITensor *out, int batch_offset, const Windo
         },
         input, output);
     }
+    else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo)
+    {
+        execute_window_loop(window, [&](const Coordinates &)
+        {
+            const auto in_ptr  = reinterpret_cast<const int8_t *>(input_ptr + input.offset());
+            const auto out_ptr = reinterpret_cast<int8_t *>(output_ptr + output.offset());
+            vst1q_s8(out_ptr, vquantize_signed(vdequantize(vld1q_s8(in_ptr), input_qinfo), output_qinfo));
+        },
+        input, output);
+    }
     else
     {
         execute_window_loop(window, [&](const Coordinates &)
@@ -102,10 +112,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, c
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
-                                                         DataType::U16, DataType::S16,
-                                                         DataType::U32, DataType::S32,
-                                                         DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
 
     ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX));
@@ -138,6 +145,7 @@ void NEBatchConcatenateLayerKernel::configure(const ITensor *input, unsigned int
         case DataType::S8:
         case DataType::U8:
         case DataType::QASYMM8:
+        case DataType::QASYMM8_SIGNED:
             _func = &batch_concat<uint8_t>;
             break;
         case DataType::S16:
diff --git a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp
index 6211abcad0..d167cbb05d 100644
--- a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp
@@ -43,6 +43,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_inf
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, output);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32);
     ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
 
     // Validate output if initialized
     if(output->total_size() != 0)
diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
index b2b0dbd789..45f6fd99fe 100644
--- a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
@@ -39,12 +39,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups)
 {
     // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input,
-                                                         1,
-                                                         DataType::U8, DataType::S8, DataType::QASYMM8,
-                                                         DataType::U16, DataType::S16,
-                                                         DataType::U32, DataType::S32,
-                                                         DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NCHW, DataLayout::NHWC);
 
     const unsigned int channels = input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL));
diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp
index cea8782354..4f1b1d8ca2 100644
--- a/src/core/NEON/kernels/NECol2ImKernel.cpp
+++ b/src/core/NEON/kernels/NECol2ImKernel.cpp
@@ -43,6 +43,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims)
 {
     //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
 
     // Validate configured output
     if(output->total_size() != 0)
diff --git a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
index b6d166d30e..ab6636018f 100644
--- a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,8 +26,8 @@
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Types.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 NEConvertFullyConnectedWeightsKernel::NEConvertFullyConnectedWeightsKernel()
     : _input(nullptr), _output(nullptr), _factor1(0), _factor2(0)
 {
@@ -66,12 +66,9 @@ void NEConvertFullyConnectedWeightsKernel::configure(const ITensor *input, ITens
 Status NEConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape,
                                                       DataLayout data_layout)
 {
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
     //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1,
-                                                         DataType::U8, DataType::S8, DataType::QASYMM8,
-                                                         DataType::U16, DataType::S16,
-                                                         DataType::U32, DataType::S32,
-                                                         DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() != 2);
     ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != original_input_shape.total_size_lower(3));
     ARM_COMPUTE_RETURN_ERROR_ON(data_layout == DataLayout::UNKNOWN);
@@ -124,3 +121,4 @@ void NEConvertFullyConnectedWeightsKernel::run(const Window &window, const Threa
             break;
     }
 }
+} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NECopyKernel.cpp b/src/core/NEON/kernels/NECopyKernel.cpp
index 83f3dded4f..6bf49549e2 100644
--- a/src/core/NEON/kernels/NECopyKernel.cpp
+++ b/src/core/NEON/kernels/NECopyKernel.cpp
@@ -38,6 +38,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList())
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > 4);
 
     // Validate output if initialized
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
index 4377006f28..56ab11415c 100644
--- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
@@ -66,6 +66,16 @@ void depth_concat(const ITensor *in, ITensor *out, int depth_offset, const Windo
         },
         input, output);
     }
+    else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo)
+    {
+        execute_window_loop(window, [&](const Coordinates &)
+        {
+            const auto in_ptr  = reinterpret_cast<const int8_t *>(input_ptr + input.offset());
+            const auto out_ptr = reinterpret_cast<int8_t *>(output_ptr + output.offset());
+            vst1q_s8(out_ptr, vquantize_signed(vdequantize(vld1q_s8(in_ptr), input_qinfo), output_qinfo));
+        },
+        input, output);
+    }
     else
     {
         execute_window_loop(window, [&](const Coordinates &)
@@ -102,7 +112,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, c
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
 
     ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX));
@@ -134,6 +144,9 @@ void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int
         case DataType::QASYMM8:
             _func = &depth_concat<uint8_t>;
             break;
+        case DataType::QASYMM8_SIGNED:
+            _func = &depth_concat<int8_t>;
+            break;
         case DataType::F16:
             _func = &depth_concat<uint16_t>;
             break;
diff --git a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp
index df631c3c03..b51d0d66c5 100644
--- a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp
@@ -41,6 +41,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
     ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 2);
 
diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp
index 13db1659ce..75d46c61d8 100644
--- a/src/core/NEON/kernels/NEFillBorderKernel.cpp
+++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp
@@ -97,11 +97,9 @@ NEFillBorderKernel::NEFillBorderKernel()
 
 void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
 {
+    ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
     //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::U8, DataType::QASYMM8,
-                                                  DataType::U16, DataType::S16,
-                                                  DataType::U32, DataType::S32,
-                                                  DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON(tensor->info()->data_type() == DataType::UNKNOWN);
 
     _tensor                = tensor;
     _border_size           = border_size;
diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp
index 4840a95540..a48601f7b0 100644
--- a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp
@@ -42,10 +42,8 @@ namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
-                                                         DataType::U16, DataType::S16,
-                                                         DataType::U32, DataType::S32,
-                                                         DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
 
diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
index a9c04824ae..8f73bdb3a3 100644
--- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
@@ -44,6 +44,7 @@ namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
 {
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
     //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
     ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
index ea3d32e628..88104f7297 100644
--- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
@@ -54,10 +54,9 @@ TensorShape get_output_shape(const ITensorInfo *input)
 
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
 {
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::U8, DataType::S8,
-                                                         DataType::U16, DataType::S16, DataType::U32, DataType::S32,
-                                                         DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
 
     if(output->total_size() != 0)
diff --git a/src/core/NEON/kernels/NEGatherKernel.cpp b/src/core/NEON/kernels/NEGatherKernel.cpp
index 1e027b7292..0a7a8dfc21 100644
--- a/src/core/NEON/kernels/NEGatherKernel.cpp
+++ b/src/core/NEON/kernels/NEGatherKernel.cpp
@@ -52,6 +52,32 @@ void validate_indices(const ITensor *indices)
     }
 }
 
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(indices->num_dimensions() > 1);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
+
+    if(axis < 0)
+    {
+        axis += input->num_dimensions();
+    }
+
+    ARM_COMPUTE_RETURN_ERROR_ON(0 > axis || axis >= static_cast<int32_t>(input->num_dimensions()));
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
+
+    if(output->total_size() != 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
+        TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), axis);
+        ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
+    }
+
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32);
+
+    return Status{};
+}
 } // namespace
 
 NEGatherKernel::NEGatherKernel()
@@ -107,9 +133,7 @@ void NEGatherKernel::gather_n_axis(const Window &window, const ThreadInfo &info)
 void NEGatherKernel::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices);
-    ARM_COMPUTE_ERROR_ON(indices->info()->num_dimensions() != 1);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), indices->info(), output->info(), axis));
 
     _input   = input;
     _indices = indices;
@@ -154,7 +178,7 @@ void NEGatherKernel::configure(const ITensor *input, const ITensor *indices, ITe
     }
     // Output auto initialization if not yet initialized
     TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis);
-    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
+    auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
 
     // Create window
     Window win = calculate_max_window(*output->info(), Steps());
@@ -165,31 +189,7 @@ void NEGatherKernel::configure(const ITensor *input, const ITensor *indices, ITe
 
 Status NEGatherKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output);
-    ARM_COMPUTE_RETURN_ERROR_ON(indices->num_dimensions() > 1);
-    ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
-
-    if(axis < 0)
-    {
-        axis += input->num_dimensions();
-    }
-
-    ARM_COMPUTE_RETURN_ERROR_ON(0 > axis || axis >= static_cast<int32_t>(input->num_dimensions()));
-    ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
-                                                         DataType::U16, DataType::S16,
-                                                         DataType::U32, DataType::S32, DataType::F16, DataType::F32);
-
-    if(output->total_size() != 0)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
-        TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), axis);
-        ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
-    }
-
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32);
-
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, indices, output, axis));
     return Status{};
 }
 
diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
index 18be1d5bb7..4f7ae8c0b1 100644
--- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp
@@ -36,8 +36,8 @@
 
 #include <cstdint>
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 namespace
 {
 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
@@ -58,10 +58,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int height_offset,
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1,
-                                                         DataType::U8, DataType::S8, DataType::QASYMM8,
-                                                         DataType::U16, DataType::S16, DataType::F16,
-                                                         DataType::U32, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX));
     ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) + height_offset > output->dimension(Window::DimY));
@@ -128,6 +125,15 @@ void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo
         },
         input, output);
     }
+    else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo)
+    {
+        execute_window_loop(window, [&](const Coordinates &)
+        {
+            vst1q_s8(reinterpret_cast<int8_t *>(output_ptr + output.offset()),
+                     vquantize_signed(vdequantize(vld1q_s8(reinterpret_cast<int8_t *>(input.ptr())), input_qinfo), output_qinfo));
+        },
+        input, output);
+    }
     else
     {
         execute_window_loop(window, [&](const Coordinates &)
@@ -140,3 +146,4 @@ void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo
         input, output);
     }
 }
+} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEPadLayerKernel.cpp b/src/core/NEON/kernels/NEPadLayerKernel.cpp
index 88a1c2ec83..07229af358 100644
--- a/src/core/NEON/kernels/NEPadLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPadLayerKernel.cpp
@@ -38,6 +38,8 @@ namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &paddings, const PaddingMode mode)
 {
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(mode != PaddingMode::CONSTANT, "Only constant padding mode is supported");
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(paddings.size() > 4, "Padding list bigger than 4 dimensions");
     if(output->total_size() != 0)
diff --git a/src/core/NEON/kernels/NEPermuteKernel.cpp b/src/core/NEON/kernels/NEPermuteKernel.cpp
index 897b764b45..2c0db769f5 100644
--- a/src/core/NEON/kernels/NEPermuteKernel.cpp
+++ b/src/core/NEON/kernels/NEPermuteKernel.cpp
@@ -91,6 +91,7 @@ inline bool is_permutation_supported(const PermutationVector &v)
 
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
 {
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_permutation_supported(perm), "PermutationVector not supported.");
 
     const TensorShape output_shape = misc::shape_calculator::compute_permutation_output_shape(*input, perm);
diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.cpp b/src/core/NEON/kernels/NEReorgLayerKernel.cpp
index ece5aa431c..f8a8732c64 100644
--- a/src/core/NEON/kernels/NEReorgLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEReorgLayerKernel.cpp
@@ -41,11 +41,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t stride)
 {
     //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1,
-                                                         DataType::U8, DataType::S8, DataType::QASYMM8,
-                                                         DataType::U16, DataType::S16,
-                                                         DataType::U32, DataType::S32,
-                                                         DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
 
     const size_t idx_width  = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
index 649fba30d4..53fcfd724d 100644
--- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp
@@ -44,8 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
-                                                         DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
 
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().total_size() != output->tensor_shape().total_size());
@@ -107,6 +106,7 @@ void NEReshapeLayerKernel::run(const Window &window, const ThreadInfo &info)
         case DataType::U8:
         case DataType::S8:
         case DataType::QASYMM8:
+        case DataType::QASYMM8_SIGNED:
             reshape_tensor<uint8_t>(window, _input, _output);
             break;
         case DataType::U16:
diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp
index 99328deecd..2f584164dc 100644
--- a/src/core/NEON/kernels/NEReverseKernel.cpp
+++ b/src/core/NEON/kernels/NEReverseKernel.cpp
@@ -49,10 +49,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, axis);
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
-                                                         DataType::U16, DataType::S16,
-                                                         DataType::U32, DataType::S32,
-                                                         DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(axis, 1, DataType::U32);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis->num_dimensions() > 1, "Axis must be a 1D tensor");
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis->dimension(0) > 4, "Only up to 4 dimensions can be reversed");
@@ -200,6 +197,7 @@ void NEReverseKernel::run(const Window &window, const ThreadInfo &info)
             run_reverse<uint16_t>(window, _input, _axis, _output);
             break;
         case DataType::QASYMM8:
+        case DataType::QASYMM8_SIGNED:
         case DataType::U8:
         case DataType::S8:
             run_reverse<uint8_t>(window, _input, _axis, _output);
diff --git a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp
index f51c3940b7..ba3377f13b 100644
--- a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp
@@ -41,6 +41,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_info, const ITensorInfo *padddings, const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, padddings, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32);
     ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
     ARM_COMPUTE_RETURN_ERROR_ON(block_info->num_dimensions() > 1);
@@ -62,6 +63,7 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape
                                  const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON(block_shape_x < 1 || block_shape_y < 1);
     ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
 
diff --git a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp
index 4803365013..b2ce63e549 100644
--- a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp
@@ -41,6 +41,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
 
     ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 1);
diff --git a/src/core/NEON/kernels/NEStackLayerKernel.cpp b/src/core/NEON/kernels/NEStackLayerKernel.cpp
index 3447d59bcc..5deca9e595 100644
--- a/src/core/NEON/kernels/NEStackLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEStackLayerKernel.cpp
@@ -45,9 +45,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int axis, unsigned
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S8,
-                                                         DataType::U16, DataType::S16, DataType::U32, DataType::S32,
-                                                         DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON(idx_input >= num_tensors);
     ARM_COMPUTE_RETURN_ERROR_ON(axis > input->num_dimensions());
     ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.cpp b/src/core/NEON/kernels/NEStridedSliceKernel.cpp
index 2de49c6864..15f786a521 100644
--- a/src/core/NEON/kernels/NEStridedSliceKernel.cpp
+++ b/src/core/NEON/kernels/NEStridedSliceKernel.cpp
@@ -43,11 +43,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
                           int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1,
-                                                         DataType::U8, DataType::S8, DataType::QASYMM8,
-                                                         DataType::U16, DataType::S16, DataType::QASYMM16, DataType::QSYMM16,
-                                                         DataType::U32, DataType::S32,
-                                                         DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
 
     ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().num_dimensions() > 4);
     ARM_COMPUTE_RETURN_ERROR_ON(starts.num_dimensions() > input->num_dimensions());
diff --git a/src/core/NEON/kernels/NETileKernel.cpp b/src/core/NEON/kernels/NETileKernel.cpp
index dbeacfad94..98f66e8391 100644
--- a/src/core/NEON/kernels/NETileKernel.cpp
+++ b/src/core/NEON/kernels/NETileKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -38,6 +38,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON(multiples.size() > 4);
     ARM_COMPUTE_RETURN_ERROR_ON(multiples.empty());
     ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(multiples.begin(), multiples.end(), [](uint32_t e)
diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp
index a0a8b82494..6a8e6ffeb5 100644
--- a/src/core/NEON/kernels/NETransposeKernel.cpp
+++ b/src/core/NEON/kernels/NETransposeKernel.cpp
@@ -75,10 +75,9 @@ unsigned int num_elems_processed(size_t element_size)
 
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
 {
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
     //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::U32, DataType::S32,
-                                                         DataType::F16,
-                                                         DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
 
     if(output->total_size() != 0)
     {
diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
index cafa20a1bd..e164a38708 100644
--- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
@@ -36,8 +36,8 @@
 
 #include <cstdint>
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 namespace
 {
 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, unsigned int width_offset, ITensorInfo *output)
@@ -58,10 +58,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, c
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1,
-                                                         DataType::U8, DataType::S8, DataType::QASYMM8,
-                                                         DataType::U16, DataType::S16, DataType::F16,
-                                                         DataType::U32, DataType::S32, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) + width_offset > output->dimension(0));
 
@@ -128,6 +125,15 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &
         },
         input, output);
     }
+    else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo)
+    {
+        execute_window_loop(window, [&](const Coordinates &)
+        {
+            vst1q_s8(reinterpret_cast<int8_t *>(output_ptr + output.offset()),
+                     vquantize_signed(vdequantize(vld1q_s8(reinterpret_cast<int8_t *>(input.ptr())), input_qinfo), output_qinfo));
+        },
+        input, output);
+    }
     else
     {
         execute_window_loop(window, [&](const Coordinates &)
@@ -140,3 +146,4 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &
         input, output);
     }
 }
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NECol2Im.cpp b/src/runtime/NEON/functions/NECol2Im.cpp
index 78c6bc0475..a1113bf5bd 100644
--- a/src/runtime/NEON/functions/NECol2Im.cpp
+++ b/src/runtime/NEON/functions/NECol2Im.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,8 +26,8 @@
 #include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
 #include "support/ToolchainSupport.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 void NECol2Im::configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims)
 {
     auto k = arm_compute::support::cpp14::make_unique<NECol2ImKernel>();
@@ -39,3 +39,4 @@ Status NECol2Im::validate(const ITensorInfo *input, const ITensorInfo *output, c
 {
     return NECol2ImKernel::validate(input, output, convolved_dims);
 }
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NECopy.cpp b/src/runtime/NEON/functions/NECopy.cpp
index efa8b893aa..5f46023ecc 100644
--- a/src/runtime/NEON/functions/NECopy.cpp
+++ b/src/runtime/NEON/functions/NECopy.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,8 +28,8 @@
 
 #include <utility>
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 void NECopy::configure(ITensor *input, ITensor *output)
 {
     auto k = arm_compute::support::cpp14::make_unique<NECopyKernel>();
@@ -41,3 +41,4 @@ Status NECopy::validate(const arm_compute::ITensorInfo *input, const arm_compute
 {
     return NECopyKernel::validate(input, output);
 }
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEFillBorder.cpp b/src/runtime/NEON/functions/NEFillBorder.cpp
index 44e49520dc..6b7a0faa85 100644
--- a/src/runtime/NEON/functions/NEFillBorder.cpp
+++ b/src/runtime/NEON/functions/NEFillBorder.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,8 +26,8 @@
 #include "arm_compute/core/Window.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 void NEFillBorder::configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value)
 {
     _border_handler.configure(input, BorderSize(border_width), border_mode, constant_border_value);
@@ -37,3 +37,4 @@ void NEFillBorder::run()
 {
     NEScheduler::get().schedule(&_border_handler, Window::DimZ);
 }
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp
index 57bef2b933..fb175a1dca 100644
--- a/src/runtime/NEON/functions/NEFlattenLayer.cpp
+++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,8 +27,8 @@
 #include "arm_compute/core/Size2D.h"
 #include "support/ToolchainSupport.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 void NEFlattenLayer::configure(const ITensor *input, ITensor *output)
 {
     auto k = arm_compute::support::cpp14::make_unique<NEFlattenLayerKernel>();
@@ -39,4 +39,5 @@ void NEFlattenLayer::configure(const ITensor *input, ITensor *output)
 Status NEFlattenLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 {
     return NEFlattenLayerKernel::validate(input, output);
-}
\ No newline at end of file
+}
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp
index 802b94650e..cb93712da0 100644
--- a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp
+++ b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,8 +30,8 @@
 #include "arm_compute/core/Validate.h"
 #include "support/ToolchainSupport.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 void NEGEMMTranspose1xW::configure(const ITensor *input, ITensor *output)
 {
     auto k = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>();
@@ -42,3 +42,4 @@ Status NEGEMMTranspose1xW::validate(const ITensorInfo *input, const ITensorInfo
 {
     return NEGEMMTranspose1xWKernel::validate(input, output);
 }
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEGather.cpp b/src/runtime/NEON/functions/NEGather.cpp
index 078bd5ab26..428ef72d11 100644
--- a/src/runtime/NEON/functions/NEGather.cpp
+++ b/src/runtime/NEON/functions/NEGather.cpp
@@ -41,5 +41,4 @@ Status NEGather::validate(const ITensorInfo *input, const ITensorInfo *indices,
 {
     return NEGatherKernel::validate(input, indices, output, axis);
 }
-
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEIm2Col.cpp b/src/runtime/NEON/functions/NEIm2Col.cpp
index 9102fca7f6..2a6972918e 100644
--- a/src/runtime/NEON/functions/NEIm2Col.cpp
+++ b/src/runtime/NEON/functions/NEIm2Col.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,8 +27,8 @@
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 #include "support/ToolchainSupport.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 NEIm2Col::NEIm2Col()
     : _kernel(), _y_dim(1)
 {
@@ -51,3 +51,4 @@ void NEIm2Col::run()
 {
     NEScheduler::get().schedule(&_kernel, _y_dim);
 }
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEPermute.cpp b/src/runtime/NEON/functions/NEPermute.cpp
index 92abd03e2a..e6c41bfb03 100644
--- a/src/runtime/NEON/functions/NEPermute.cpp
+++ b/src/runtime/NEON/functions/NEPermute.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,8 +26,8 @@
 #include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
 #include "support/ToolchainSupport.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 void NEPermute::configure(const ITensor *input, ITensor *output, const PermutationVector &perm)
 {
     auto k = arm_compute::support::cpp14::make_unique<NEPermuteKernel>();
@@ -39,3 +39,4 @@ Status NEPermute::validate(const ITensorInfo *input, const ITensorInfo *output,
 {
     return NEPermuteKernel::validate(input, output, perm);
 }
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp
index 4600f36660..e2cd45ae6e 100644
--- a/src/runtime/NEON/functions/NEReshapeLayer.cpp
+++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,8 +29,8 @@
 
 #include <utility>
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 void NEReshapeLayer::configure(const ITensor *input, ITensor *output)
 {
     auto k = arm_compute::support::cpp14::make_unique<NEReshapeLayerKernel>();
@@ -45,3 +45,4 @@ Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *out
 
     return Status{};
 }
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NESelect.cpp b/src/runtime/NEON/functions/NESelect.cpp
index 509bbaa24e..c6089c8ec0 100644
--- a/src/runtime/NEON/functions/NESelect.cpp
+++ b/src/runtime/NEON/functions/NESelect.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,8 +26,6 @@
 #include "arm_compute/core/NEON/kernels/NESelectKernel.h"
 #include "arm_compute/core/Types.h"
 
-using namespace arm_compute;
-
 namespace arm_compute
 {
 void NESelect::configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output)
diff --git a/src/runtime/NEON/functions/NETranspose.cpp b/src/runtime/NEON/functions/NETranspose.cpp
index b5b28e8e18..fd0d73aa7c 100644
--- a/src/runtime/NEON/functions/NETranspose.cpp
+++ b/src/runtime/NEON/functions/NETranspose.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,8 +28,8 @@
 
 #include <utility>
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 void NETranspose::configure(const ITensor *input, ITensor *output)
 {
     auto k = arm_compute::support::cpp14::make_unique<NETransposeKernel>();
@@ -40,4 +40,5 @@ void NETranspose::configure(const ITensor *input, ITensor *output)
 Status NETranspose::validate(const ITensorInfo *input, const ITensorInfo *output)
 {
     return NETransposeKernel::validate(input, output);
-}
\ No newline at end of file
+}
+} // namespace arm_compute
\ No newline at end of file
diff --git a/tests/validation/NEON/BatchConcatenateLayer.cpp b/tests/validation/NEON/BatchConcatenateLayer.cpp
index f95663dbd3..2c0ebc3d54 100644
--- a/tests/validation/NEON/BatchConcatenateLayer.cpp
+++ b/tests/validation/NEON/BatchConcatenateLayer.cpp
@@ -136,10 +136,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchConcatenateLayerFixture<uint8_t>, framew
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(),
-                                                                                                                   framework::dataset::make("DataType",
-                                                                                                                           DataType::QASYMM8)),
-                                                                                                                   framework::dataset::make("Axis", 3)))
+TEST_SUITE_END()
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchConcatenateLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::QASYMM8_SIGNED)),
+                                                                                                                    framework::dataset::make("Axis", 3)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp
index 844c3987db..eea7c4d82f 100644
--- a/tests/validation/NEON/DepthConcatenateLayer.cpp
+++ b/tests/validation/NEON/DepthConcatenateLayer.cpp
@@ -133,10 +133,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<uint8_t>, framew
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(),
-                                                                                                                   framework::dataset::make("DataType",
-                                                                                                                           DataType::QASYMM8)),
-                                                                                                                   framework::dataset::make("Axis", 2)))
+TEST_SUITE_END()
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::QASYMM8_SIGNED)),
+                                                                                                                    framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
diff --git a/tests/validation/NEON/HeightConcatenateLayer.cpp b/tests/validation/NEON/HeightConcatenateLayer.cpp
index 075dfa3b24..bfb0a21ab9 100644
--- a/tests/validation/NEON/HeightConcatenateLayer.cpp
+++ b/tests/validation/NEON/HeightConcatenateLayer.cpp
@@ -118,15 +118,18 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEHeightConcatenateLayerFixture<uint8_t>, frame
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEHeightConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(),
-                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                            DataType::QASYMM8)),
-                                                                                                                    framework::dataset::make("Axis", 1)))
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEHeightConcatenateLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::QASYMM8_SIGNED)),
+                                                                                                                     framework::dataset::make("Axis", 1)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // QASYMM8_SIGNED
 TEST_SUITE_END() // Quantized
 
 TEST_SUITE_END()
diff --git a/tests/validation/NEON/WidthConcatenateLayer.cpp b/tests/validation/NEON/WidthConcatenateLayer.cpp
index ba0ff1bd81..3edf136cab 100644
--- a/tests/validation/NEON/WidthConcatenateLayer.cpp
+++ b/tests/validation/NEON/WidthConcatenateLayer.cpp
@@ -117,9 +117,13 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture<uint8_t>, framew
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
-                                                                                                                   DataType::QASYMM8)),
-                                                                                                                   framework::dataset::make("Axis", 0)))
+TEST_SUITE_END()
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::QASYMM8_SIGNED)),
+                                                                                                                    framework::dataset::make("Axis", 0)))
 
 {
     // Validate output
diff --git a/tests/validation/reference/ConcatenateLayer.cpp b/tests/validation/reference/ConcatenateLayer.cpp
index aa74ca2474..266dae1c27 100644
--- a/tests/validation/reference/ConcatenateLayer.cpp
+++ b/tests/validation/reference/ConcatenateLayer.cpp
@@ -70,16 +70,27 @@ SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs,
                 for(int r = 0; r < height; ++r)
                 {
                     const int offset = u * height * depth + d * height + r;
-                    if(src.data_type() == DataType::QASYMM8 && src.quantization_info() != dst.quantization_info())
+                    if(is_data_type_quantized(src.data_type()) && src.quantization_info() != dst.quantization_info())
                     {
                         const UniformQuantizationInfo iq_info = src.quantization_info().uniform();
                         const UniformQuantizationInfo oq_info = dst.quantization_info().uniform();
 
-                        std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [&](T t)
+                        if(src.data_type() == DataType::QASYMM8)
                         {
-                            const float dequantized_input = dequantize_qasymm8(t, iq_info);
-                            return quantize_qasymm8(dequantized_input, oq_info);
-                        });
+                            std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [&](T t)
+                            {
+                                const float dequantized_input = dequantize_qasymm8(t, iq_info);
+                                return quantize_qasymm8(dequantized_input, oq_info);
+                            });
+                        }
+                        else
+                        {
+                            std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [&](T t)
+                            {
+                                const float dequantized_input = dequantize_qasymm8_signed(t, iq_info);
+                                return quantize_qasymm8_signed(dequantized_input, oq_info);
+                            });
+                        }
                         src_ptr += width;
                     }
                     else
@@ -98,6 +109,7 @@ SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs,
 template SimpleTensor<float> widthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst);
 template SimpleTensor<half> widthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst);
 template SimpleTensor<uint8_t> widthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst);
+template SimpleTensor<int8_t> widthconcatenate_layer(const std::vector<SimpleTensor<int8_t>> &srcs, SimpleTensor<int8_t> &dst);
 } // namespace
 
 template <typename T>
@@ -148,6 +160,7 @@ SimpleTensor<T> concatenate_layer(std::vector<SimpleTensor<T>> &srcs, SimpleTens
 template SimpleTensor<float> concatenate_layer(std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst, unsigned int axis);
 template SimpleTensor<half> concatenate_layer(std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst, unsigned int axis);
 template SimpleTensor<uint8_t> concatenate_layer(std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst, unsigned int axis);
+template SimpleTensor<int8_t> concatenate_layer(std::vector<SimpleTensor<int8_t>> &srcs, SimpleTensor<int8_t> &dst, unsigned int axis);
 } // namespace reference
 } // namespace validation
 } // namespace test
-- 
cgit v1.2.1