From 8217c8e4f488eb32733c481ab3a4d905069479f1 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Mon, 11 Nov 2019 18:24:22 +0000
Subject: COMPMID-2895: Remove QASYMM8_PER_CHANNEL data type

Change-Id: I2d1b77370f8eceeaeae95306b4db5d90ababb76f
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2266
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 .../core/CL/kernels/CLDequantizationLayerKernel.h  |  4 +--
 arm_compute/core/NEON/NEAsymm.h                    | 17 +++++----
 .../NEON/kernels/NEDequantizationLayerKernel.h     |  4 +--
 arm_compute/core/QuantizationInfo.h                | 15 --------
 arm_compute/core/Types.h                           | 39 ++++++++++----------
 arm_compute/core/Utils.h                           |  5 ---
 .../runtime/CL/functions/CLDequantizationLayer.h   |  4 +--
 .../runtime/NEON/functions/NEDequantizationLayer.h |  4 +--
 src/core/CL/CLHelpers.cpp                          |  5 ---
 src/core/CL/cl_kernels/dequantization_layer.cl     | 35 +++++++-----------
 .../CL/kernels/CLDequantizationLayerKernel.cpp     |  3 +-
 .../NEON/kernels/NEDequantizationLayerKernel.cpp   | 41 ++++++++--------------
 src/core/Utils.cpp                                 |  4 ---
 tests/AssetsLibrary.h                              |  2 +-
 tests/Utils.h                                      |  1 -
 tests/datasets/DatatypeDataset.h                   |  2 +-
 tests/validation/Helpers.cpp                       |  9 -----
 tests/validation/Helpers.h                         |  9 -----
 .../fixtures/DequantizationLayerFixture.h          | 12 +++----
 tests/validation/reference/DequantizationLayer.cpp |  6 ++--
 utils/TypePrinter.h                                |  3 --
 utils/Utils.h                                      |  1 -
 22 files changed, 72 insertions(+), 153 deletions(-)

diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
index 830d7518ce..739e2d45d2 100644
--- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
@@ -48,13 +48,13 @@ public:
     ~CLDequantizationLayerKernel() = default;
     /** Set the input, output, min and max.
      *
-     * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[in]  input  Source tensor. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[out] output Destination tensor. Data types supported: F16/F32.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel
      *
-     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[in] output Output tensor info. Data types supported: F16/F32.
      *
      * @return a status
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h
index a3bd7e28f0..c75a58046b 100644
--- a/arm_compute/core/NEON/NEAsymm.h
+++ b/arm_compute/core/NEON/NEAsymm.h
@@ -325,23 +325,22 @@ inline float32x4x4_t vdequantize(const uint8x16_t &qv, float scale, int32_t offs
     return vdequantized_input;
 }
 
-/** Dequantize following an asymmetric quantization scheme a neon vector holding 16 quantized values.
+/** Dequantize following symmetric quantization scheme a neon vector holding 16 quantized values.
  *
- * @param[in] qv      Input values to be dequantized.
- * @param[in] vscale  Vector containing quantization scaling factors.
- * @param[in] voffset Vector containing quantization offset.
+ * @param[in] qv     Input values to be dequantized.
+ * @param[in] vscale Vector containing quantization scaling factors.
  *
  * @return Dequantized values in a neon vector
  */
-inline float32x4x4_t vdequantize(const uint8x16_t &qv, const float32x4x4_t vscale, const int32x4x4_t voffset)
+inline float32x4x4_t vdequantize(const int8x16_t &qv, const float32x4x4_t vscale)
 {
     const float32x4x4_t vdequantized_input =
     {
         {
-            vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset.val[0])), vscale.val[0]),
-            vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset.val[1])), vscale.val[1]),
-            vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset.val[2])), vscale.val[2]),
-            vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset.val[3])), vscale.val[3]),
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[0]),
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[1]),
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[2]),
+            vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[3]),
         }
     };
     return vdequantized_input;
diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
index 3e7feda650..7e65384677 100644
--- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
@@ -52,13 +52,13 @@ public:
     ~NEDequantizationLayerKernel() = default;
     /** Set input, output tensors.
      *
-     * @param[in]  input  Source tensor. Data type supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[in]  input  Source tensor. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel
      *
-     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[in] output Output tensor info. Data types supported: F16/F32.
      *
      * @return a status
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index 949ee66b7c..ebd9b677da 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -265,21 +265,6 @@ inline int8_t quantize_qsymm8_per_channel(float value, const QuantizationInfo &q
     return quantized;
 }
 
-/** Quantize a value given a 8-bit asymmetric per channel quantization scheme
- *
- * @param[in] value      Value to quantize
- * @param[in] qinfo      Quantization information to use for quantizing
- * @param[in] channel_id channel index into the scale vector of quantization info
- *
- * @return Quantized value
- */
-inline int8_t quantize_qasymm8_per_channel(float value, const QuantizationInfo &qinfo, size_t channel_id = 0)
-{
-    int quantized = arm_compute::round(value / qinfo.scale()[channel_id], RoundingPolicy::TO_NEAREST_UP);
-    quantized     = std::max(0, std::min(quantized, 255));
-    return quantized;
-}
-
 /** Dequantize a value given a 8-bit asymmetric quantization scheme
  *
  * @param[in] value Value to dequantize
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 9551cc6547..851292f1e1 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -73,26 +73,25 @@ enum class Format
 /** Available data types */
 enum class DataType
 {
-    UNKNOWN,             /**< Unknown data type */
-    U8,                  /**< unsigned 8-bit number */
-    S8,                  /**< signed 8-bit number */
-    QSYMM8,              /**< quantized, symmetric fixed-point 8-bit number */
-    QASYMM8,             /**< quantized, asymmetric fixed-point 8-bit number unsigned */
-    QASYMM8_SIGNED,      /**< quantized, asymmetric fixed-point 8-bit number signed */
-    QSYMM8_PER_CHANNEL,  /**< quantized, symmetric per channel fixed-point 8-bit number */
-    QASYMM8_PER_CHANNEL, /**< quantized, asymmetric per channel fixed-point 8-bit number */
-    U16,                 /**< unsigned 16-bit number */
-    S16,                 /**< signed 16-bit number */
-    QSYMM16,             /**< quantized, symmetric fixed-point 16-bit number */
-    QASYMM16,            /**< quantized, asymmetric fixed-point 16-bit number */
-    U32,                 /**< unsigned 32-bit number */
-    S32,                 /**< signed 32-bit number */
-    U64,                 /**< unsigned 64-bit number */
-    S64,                 /**< signed 64-bit number */
-    F16,                 /**< 16-bit floating-point number */
-    F32,                 /**< 32-bit floating-point number */
-    F64,                 /**< 64-bit floating-point number */
-    SIZET                /**< size_t */
+    UNKNOWN,            /**< Unknown data type */
+    U8,                 /**< unsigned 8-bit number */
+    S8,                 /**< signed 8-bit number */
+    QSYMM8,             /**< quantized, symmetric fixed-point 8-bit number */
+    QASYMM8,            /**< quantized, asymmetric fixed-point 8-bit number unsigned */
+    QASYMM8_SIGNED,     /**< quantized, asymmetric fixed-point 8-bit number signed */
+    QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */
+    U16,                /**< unsigned 16-bit number */
+    S16,                /**< signed 16-bit number */
+    QSYMM16,            /**< quantized, symmetric fixed-point 16-bit number */
+    QASYMM16,           /**< quantized, asymmetric fixed-point 16-bit number */
+    U32,                /**< unsigned 32-bit number */
+    S32,                /**< signed 32-bit number */
+    U64,                /**< unsigned 64-bit number */
+    S64,                /**< signed 64-bit number */
+    F16,                /**< 16-bit floating-point number */
+    F32,                /**< 32-bit floating-point number */
+    F64,                /**< 64-bit floating-point number */
+    SIZET               /**< size_t */
 };
 
 /** Available Sampling Policies */
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index a6e1ea1a89..366d5dcc68 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -116,7 +116,6 @@ inline size_t data_size_from_type(DataType data_type)
         case DataType::QASYMM8:
         case DataType::QASYMM8_SIGNED:
         case DataType::QSYMM8_PER_CHANNEL:
-        case DataType::QASYMM8_PER_CHANNEL:
             return 1;
         case DataType::U16:
         case DataType::S16:
@@ -537,7 +536,6 @@ inline DataType get_promoted_data_type(DataType dt)
         case DataType::QASYMM8:
         case DataType::QASYMM8_SIGNED:
         case DataType::QSYMM8_PER_CHANNEL:
-        case DataType::QASYMM8_PER_CHANNEL:
         case DataType::QSYMM16:
         case DataType::QASYMM16:
         case DataType::F16:
@@ -1029,7 +1027,6 @@ inline bool is_data_type_quantized(DataType dt)
         case DataType::QASYMM8:
         case DataType::QASYMM8_SIGNED:
         case DataType::QSYMM8_PER_CHANNEL:
-        case DataType::QASYMM8_PER_CHANNEL:
         case DataType::QSYMM16:
         case DataType::QASYMM16:
             return true;
@@ -1050,7 +1047,6 @@ inline bool is_data_type_quantized_asymmetric(DataType dt)
     {
         case DataType::QASYMM8:
         case DataType::QASYMM8_SIGNED:
-        case DataType::QASYMM8_PER_CHANNEL:
         case DataType::QASYMM16:
             return true;
         default:
@@ -1088,7 +1084,6 @@ inline bool is_data_type_quantized_per_channel(DataType dt)
     switch(dt)
     {
         case DataType::QSYMM8_PER_CHANNEL:
-        case DataType::QASYMM8_PER_CHANNEL:
             return true;
         default:
             return false;
diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
index c519311fb1..f3e507a267 100644
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
@@ -40,13 +40,13 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]  input  Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.
-     *                    Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     *                    Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayer
      *
-     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[in] output Output tensor info. Data type supported: F16/F32.
      *
      * @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
index 88c8777a68..4031ae01ed 100644
--- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
@@ -39,13 +39,13 @@ class NEDequantizationLayer : public INESimpleFunctionNoBorder
 public:
     /** Configure the kernel.
      *
-     * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[in]  input  Source tensor. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayer
      *
-     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
      * @param[in] output Output tensor info. Data type supported: F16/F32.
      *
      * @return a status
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 26660ce215..17274d38ad 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -40,7 +40,6 @@ std::string get_cl_type_from_data_type(const DataType &dt)
     {
         case DataType::U8:
         case DataType::QASYMM8:
-        case DataType::QASYMM8_PER_CHANNEL:
             return "uchar";
         case DataType::S8:
         case DataType::QSYMM8:
@@ -76,7 +75,6 @@ std::string get_cl_promoted_type_from_data_type(const DataType &dt)
     {
         case DataType::U8:
         case DataType::QASYMM8:
-        case DataType::QASYMM8_PER_CHANNEL:
             return "ushort";
         case DataType::S8:
         case DataType::QSYMM8:
@@ -124,7 +122,6 @@ std::string get_cl_select_type_from_data_type(const DataType &dt)
     {
         case DataType::U8:
         case DataType::QASYMM8:
-        case DataType::QASYMM8_PER_CHANNEL:
             return "uchar";
         case DataType::S8:
         case DataType::QSYMM8:
@@ -161,7 +158,6 @@ std::string get_data_size_from_data_type(const DataType &dt)
         case DataType::QSYMM8:
         case DataType::QASYMM8:
         case DataType::QSYMM8_PER_CHANNEL:
-        case DataType::QASYMM8_PER_CHANNEL:
             return "8";
         case DataType::U16:
         case DataType::S16:
@@ -306,7 +302,6 @@ size_t preferred_vector_width(const cl::Device &device, const DataType dt)
         case DataType::QASYMM8:
         case DataType::QSYMM8:
         case DataType::QSYMM8_PER_CHANNEL:
-        case DataType::QASYMM8_PER_CHANNEL:
             return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();
         case DataType::U16:
         case DataType::S16:
diff --git a/src/core/CL/cl_kernels/dequantization_layer.cl b/src/core/CL/cl_kernels/dequantization_layer.cl
index 5826847a5e..7550b4ba76 100644
--- a/src/core/CL/cl_kernels/dequantization_layer.cl
+++ b/src/core/CL/cl_kernels/dequantization_layer.cl
@@ -90,13 +90,13 @@ __kernel void dequantization_layer(
 #endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST) && defined(SCALE) && defined(OFFSET)
 
 #if defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST)
-/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NCHW)
+/** This performs per channel dequantization of 8-bit signed integers to floating point. (NCHW)
  *
  * @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char
  * @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float
  * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
  *
- * @param[in]  input_ptr                            Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL
+ * @param[in]  input_ptr                            Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL
  * @param[in]  input_stride_x                       Stride of the source tensor in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the source tensor in Y dimension (in bytes)
@@ -113,13 +113,11 @@ __kernel void dequantization_layer(
  * @param[in]  output_step_z                        output_stride_z * number of elements along Z processed per workitem(in bytes)
  * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the destination tensor
  * @param[in]  scale                                Pointer to buffer with the per channel quantized scales
- * @param[in]  offset                               Pointer to buffer with the per channel quantized offsets
  */
 __kernel void dequantization_layer_per_channel_nchw(
     TENSOR3D_DECLARATION(input),
     TENSOR3D_DECLARATION(output),
-    __global float *scale,
-    __global int   *offset)
+    __global float *scale)
 {
     // Get pixels pointer
     Tensor3D input  = CONVERT_TO_TENSOR3D_STRUCT(input);
@@ -136,31 +134,28 @@ __kernel void dequantization_layer_per_channel_nchw(
     VEC_DATA_TYPE(int, VEC_SIZE)
     val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE));
 
-    // Create scale and offset vectors
+    // Create scale vectors
     const VEC_DATA_TYPE(float, VEC_SIZE)
     vscale = scale[get_global_id(2)];
 
-    const VEC_DATA_TYPE(int, VEC_SIZE)
-    voffset = offset[get_global_id(2)];
-
     // Dequantize
     VEC_DATA_TYPE(float, VEC_SIZE)
-    res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE));
+    res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE));
 
     // Store result
     VSTORE(VEC_SIZE)
     (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr);
 #else  // !defined(LAST_ACCESSED_X)
-    *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(2)]) * scale[get_global_id(2)]);
+    *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(2)]);
 #endif // defined(LAST_ACCESSED_X)
 }
-/** This performs per channel dequantization of 8-bit unsigned integers to floating point. (NHWC)
+/** This performs per channel dequantization of 8-bit signed integers to floating point. (NHWC)
  *
  * @note Source datatype should be given as a preprocessor argument using -DDATA_TYPE_SRC=type. e.g. -DDATA_TYPE_SRC=char
  * @note Destination datatype should be given as a preprocessor argument using -DDATA_TYPE_DST=type. e.g. -DDATA_TYPE_DST=float
  * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
  *
- * @param[in]  input_ptr                            Pointer to the source tensor. Supported data types: QASYMM8_PER_CHANNEL
+ * @param[in]  input_ptr                            Pointer to the source tensor. Supported data types: QSYMM8_PER_CHANNEL
  * @param[in]  input_stride_x                       Stride of the source tensor in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the source tensor in Y dimension (in bytes)
@@ -177,13 +172,11 @@ __kernel void dequantization_layer_per_channel_nchw(
  * @param[in]  output_step_z                        output_stride_z * number of elements along Z processed per workitem(in bytes)
  * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the destination tensor
  * @param[in]  scale                                Pointer to buffer with the per channel quantized scales
- * @param[in]  offset                               Pointer to buffer with the per channel quantized offsets
  */
 __kernel void dequantization_layer_per_channel_nhwc(
     TENSOR3D_DECLARATION(input),
     TENSOR3D_DECLARATION(output),
-    __global float *scale,
-    __global int   *offset)
+    __global float *scale)
 {
     // Get pixels pointer
     Tensor3D input  = CONVERT_TO_TENSOR3D_STRUCT(input);
@@ -196,28 +189,24 @@ __kernel void dequantization_layer_per_channel_nhwc(
     input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x;
     output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x;
     scale -= max(xi - (int)LAST_ACCESSED_X, 0);
-    offset -= max(xi - (int)LAST_ACCESSED_X, 0);
 
     // Load data
     VEC_DATA_TYPE(int, VEC_SIZE)
     val = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_SRC *)input.ptr), VEC_DATA_TYPE(int, VEC_SIZE));
 
-    // Create scale and offset vectors
+    // Create scale vectors
     const VEC_DATA_TYPE(float, VEC_SIZE)
     vscale = VLOAD(VEC_SIZE)(0, &scale[xi]);
 
-    const VEC_DATA_TYPE(int, VEC_SIZE)
-    voffset = VLOAD(VEC_SIZE)(0, &offset[xi]);
-
     // Dequantize
     VEC_DATA_TYPE(float, VEC_SIZE)
-    res = vscale * CONVERT((val - voffset), VEC_DATA_TYPE(float, VEC_SIZE));
+    res = vscale * CONVERT((val), VEC_DATA_TYPE(float, VEC_SIZE));
 
     // Store result
     VSTORE(VEC_SIZE)
     (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_DST, VEC_SIZE)), 0, (__global DATA_TYPE_DST *)output.ptr);
 #else  // !defined(LAST_ACCESSED_X)
-    *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr))) - offset[get_global_id(0)]) * scale[get_global_id(0)]);
+    *((__global DATA_TYPE_DST *)(output.ptr)) = (DATA_TYPE_DST)((float)((int)(*((__global DATA_TYPE_SRC *)(input.ptr)))) * scale[get_global_id(0)]);
 #endif // defined(LAST_ACCESSED_X)
 }
 #endif // defined(VEC_SIZE) && defined(DATA_TYPE_SRC) && defined(DATA_TYPE_DST)
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index 3ec0b87636..60659faaaf 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -40,7 +40,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
 
     if(output->tensor_shape().total_size() > 0)
     {
@@ -144,7 +144,6 @@ void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &qu
     {
         unsigned int idx = num_arguments_per_3D_tensor() * 2; //Skip the input and output parameters
         _kernel.setArg(idx++, _input->quantization().scale->cl_buffer());
-        _kernel.setArg(idx++, _input->quantization().offset->cl_buffer());
     }
 
     do
diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
index 5abd6a122d..f555df3828 100644
--- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp
@@ -43,7 +43,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
 
     if(output->tensor_shape().total_size() > 0)
     {
@@ -160,10 +160,9 @@ void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Win
 }
 
 template <typename T>
-void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window)
+void run_dequantization_qsymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window)
 {
-    const std::vector<float>   scale  = input->info()->quantization_info().scale();
-    const std::vector<int32_t> offset = input->info()->quantization_info().offset();
+    const auto scale = input->info()->quantization_info().scale();
 
     const int  window_step_x  = 16;
     const auto window_start_x = static_cast<int>(window.x().start());
@@ -179,14 +178,14 @@ void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor *
 
     execute_window_loop(win, [&](const Coordinates & id)
     {
-        const auto in_ptr  = reinterpret_cast<const uint8_t *>(in.ptr());
+        const auto in_ptr  = reinterpret_cast<const int8_t *>(in.ptr());
         const auto out_ptr = reinterpret_cast<T *>(out.ptr());
 
         int x = window_start_x;
         for(; x <= (window_end_x - window_step_x); x += window_step_x)
         {
             const auto vin  = wrapper::vloadq(in_ptr + x);
-            const auto vdeq = vdequantize(vin, scale[id.z()], offset[id.z()]);
+            const auto vdeq = vdequantize(vin, scale[id.z()]);
 
             store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
         }
@@ -194,18 +193,17 @@ void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor *
         // Compute left-over elements
         for(; x < window_end_x; ++x)
         {
-            uint8_t val    = *(in_ptr + x);
-            *(out_ptr + x) = static_cast<T>(dequantize(val, scale[id.z()], offset[id.z()]));
+            int8_t val     = *(in_ptr + x);
+            *(out_ptr + x) = static_cast<T>(dequantize(val, scale[id.z()]));
         }
     },
     in, out);
 }
 
 template <typename T>
-void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window)
+void run_dequantization_qsymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window)
 {
-    const std::vector<float>   scale  = input->info()->quantization_info().scale();
-    const std::vector<int32_t> offset = input->info()->quantization_info().offset();
+    const auto scale = input->info()->quantization_info().scale();
 
     const int  window_step_x  = 16;
     const auto window_start_x = static_cast<int>(window.x().start());
@@ -221,7 +219,7 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor *
 
     execute_window_loop(win, [&](const Coordinates &)
     {
-        const auto in_ptr  = reinterpret_cast<const uint8_t *>(in.ptr());
+        const auto in_ptr  = reinterpret_cast<const int8_t *>(in.ptr());
         const auto out_ptr = reinterpret_cast<T *>(out.ptr());
 
         int x = window_start_x;
@@ -236,17 +234,8 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor *
                     scale[x + 12], scale[x + 13], scale[x + 14], scale[x + 15]
                 }
             };
-            const int32x4x4_t voffset =
-            {
-                {
-                    offset[x + 0], offset[x + 1], offset[x + 2], offset[x + 3],
-                    offset[x + 4], offset[x + 5], offset[x + 6], offset[x + 7],
-                    offset[x + 8], offset[x + 9], offset[x + 10], offset[x + 11],
-                    offset[x + 12], offset[x + 13], offset[x + 14], offset[x + 15]
-                }
-            };
             const auto vin  = wrapper::vloadq(in_ptr + x);
-            const auto vdeq = vdequantize(vin, vscale, voffset);
+            const auto vdeq = vdequantize(vin, vscale);
 
             store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
         }
@@ -254,8 +243,8 @@ void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor *
         // Compute left-over elements
         for(; x < window_end_x; ++x)
         {
-            uint8_t val    = *(in_ptr + x);
-            *(out_ptr + x) = static_cast<T>(dequantize(val, scale[x], offset[x]));
+            int8_t val     = *(in_ptr + x);
+            *(out_ptr + x) = static_cast<T>(dequantize(val, scale[x]));
         }
     },
     in, out);
@@ -353,8 +342,8 @@ void run_dequantization_core(const ITensor *input, ITensor *output, const Window
         case DataType::QASYMM8:
             run_dequantization_qasymm8<T>(input, output, window);
             break;
-        case DataType::QASYMM8_PER_CHANNEL:
-            input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qasymm8_per_channel_nhwc<T>(input, output, window) : run_dequantization_qasymm8_per_channel_nchw<T>(input, output, window);
+        case DataType::QSYMM8_PER_CHANNEL:
+            input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qsymm8_per_channel_nhwc<T>(input, output, window) : run_dequantization_qsymm8_per_channel_nchw<T>(input, output, window);
             break;
         case DataType::QSYMM8:
             run_dequantization_qsymm8<T>(input, output, window);
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index 6d276d1322..9f1255dcaf 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -160,7 +160,6 @@ const std::string &arm_compute::string_from_data_type(DataType dt)
         { DataType::SIZET, "SIZET" },
         { DataType::QSYMM8, "QSYMM8" },
         { DataType::QSYMM8_PER_CHANNEL, "QSYMM8_PER_CHANNEL" },
-        { DataType::QASYMM8_PER_CHANNEL, "QASYMM8_PER_CHANNEL" },
         { DataType::QASYMM8, "QASYMM8" },
         { DataType::QASYMM8_SIGNED, "QASYMM8_SIGNED" },
         { DataType::QSYMM16, "QSYMM16" },
@@ -287,7 +286,6 @@ std::string arm_compute::string_from_pixel_value(const PixelValue &value, const
     {
         case DataType::U8:
         case DataType::QASYMM8:
-        case DataType::QASYMM8_PER_CHANNEL:
             // Needs conversion to 32 bit, otherwise interpreted as ASCII values
             ss << uint32_t(value.get<uint8_t>());
             converted_string = ss.str();
@@ -446,7 +444,6 @@ void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const
     {
         case DataType::U8:
         case DataType::QASYMM8:
-        case DataType::QASYMM8_PER_CHANNEL:
             print_consecutive_elements_impl<uint8_t>(s, ptr, n, stream_width, element_delim);
             break;
         case DataType::S8:
@@ -485,7 +482,6 @@ int arm_compute::max_consecutive_elements_display_width(std::ostream &s, DataTyp
     {
         case DataType::U8:
         case DataType::QASYMM8:
-        case DataType::QASYMM8_PER_CHANNEL:
             return max_consecutive_elements_display_width_impl<uint8_t>(s, ptr, n);
         case DataType::S8:
         case DataType::QASYMM8_SIGNED:
diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h
index 280f6ddbd0..f8635ea576 100644
--- a/tests/AssetsLibrary.h
+++ b/tests/AssetsLibrary.h
@@ -632,7 +632,6 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t
     {
         case DataType::U8:
         case DataType::QASYMM8:
-        case DataType::QASYMM8_PER_CHANNEL:
         {
             std::uniform_int_distribution<uint8_t> distribution_u8(std::numeric_limits<uint8_t>::lowest(), std::numeric_limits<uint8_t>::max());
             fill(tensor, distribution_u8, seed_offset);
@@ -640,6 +639,7 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t
         }
         case DataType::S8:
         case DataType::QSYMM8:
+        case DataType::QSYMM8_PER_CHANNEL:
         case DataType::QASYMM8_SIGNED:
         {
             std::uniform_int_distribution<int8_t> distribution_s8(std::numeric_limits<int8_t>::lowest(), std::numeric_limits<int8_t>::max());
diff --git a/tests/Utils.h b/tests/Utils.h
index 6b3935e526..aff63d3119 100644
--- a/tests/Utils.h
+++ b/tests/Utils.h
@@ -355,7 +355,6 @@ void store_value_with_data_type(void *ptr, T value, DataType data_type)
     {
         case DataType::U8:
         case DataType::QASYMM8:
-        case DataType::QASYMM8_PER_CHANNEL:
             *reinterpret_cast<uint8_t *>(ptr) = value;
             break;
         case DataType::S8:
diff --git a/tests/datasets/DatatypeDataset.h b/tests/datasets/DatatypeDataset.h
index 9bdb346340..df0ddb3ce5 100644
--- a/tests/datasets/DatatypeDataset.h
+++ b/tests/datasets/DatatypeDataset.h
@@ -54,7 +54,7 @@ public:
     QuantizedPerChannelTypes()
         : ContainerDataset("QuantizedPerChannelTypes",
     {
-        DataType::QASYMM8_PER_CHANNEL
+        DataType::QSYMM8_PER_CHANNEL
     })
     {
     }
diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp
index 95a5548628..e362e05b81 100644
--- a/tests/validation/Helpers.cpp
+++ b/tests/validation/Helpers.cpp
@@ -335,15 +335,6 @@ std::pair<int, int> get_symm_quantized_per_channel_bounds(const QuantizationInfo
     return std::pair<int, int> { min_bound, max_bound };
 }
 
-std::pair<int, int> get_asymm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id)
-{
-    ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max");
-
-    const int min_bound = quantize_qasymm8_per_channel(min, quant_info, channel_id);
-    const int max_bound = quantize_qasymm8_per_channel(max, quant_info, channel_id);
-    return std::pair<int, int> { min_bound, max_bound };
-}
-
 template void get_tile(const SimpleTensor<float> &in, SimpleTensor<float> &roi, const Coordinates &coord);
 template void get_tile(const SimpleTensor<half> &in, SimpleTensor<half> &roi, const Coordinates &coord);
 template void get_tile(const SimpleTensor<int> &in, SimpleTensor<int> &roi, const Coordinates &coord);
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
index 2c1df39f14..a0169752de 100644
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -285,15 +285,6 @@ std::pair<int, int> get_quantized_bounds(const QuantizationInfo &quant_info, flo
  * @param[in] channel_id Channel id for per channel quantization info.
  */
 std::pair<int, int> get_symm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id = 0);
-
-/** Helper function to compute asymmetric quantized min and max bounds
- *
- * @param[in] quant_info Quantization info to be used for conversion
- * @param[in] min        Floating point minimum value to be quantized
- * @param[in] max        Floating point maximum value to be quantized
- * @param[in] channel_id Channel id for per channel quantization info.
- */
-std::pair<int, int> get_asymm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id = 0);
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h
index c7a818fcc7..f44f8658c2 100644
--- a/tests/validation/fixtures/DequantizationLayerFixture.h
+++ b/tests/validation/fixtures/DequantizationLayerFixture.h
@@ -101,12 +101,12 @@ protected:
         switch(src_data_type)
         {
             case DataType::QASYMM8:
-            case DataType::QASYMM8_PER_CHANNEL:
             {
                 SimpleTensor<uint8_t> src{ shape, src_data_type, 1, _quantization_info };
                 fill(src);
                 return reference::dequantization_layer<T>(src);
             }
+            case DataType::QSYMM8_PER_CHANNEL:
             case DataType::QSYMM8:
             {
                 SimpleTensor<int8_t> src{ shape, src_data_type, 1, _quantization_info };
@@ -138,16 +138,14 @@ protected:
                 return QuantizationInfo(1.f / distribution_scale_q16(gen));
             case DataType::QSYMM8:
                 return QuantizationInfo(1.f / distribution_scale_q8(gen));
-            case DataType::QASYMM8_PER_CHANNEL:
+            case DataType::QSYMM8_PER_CHANNEL:
             {
-                std::vector<float>   scale(num_channels);
-                std::vector<int32_t> offset(num_channels);
+                std::vector<float> scale(num_channels);
                 for(int32_t i = 0; i < num_channels; ++i)
                 {
-                    scale[i]  = 1.f / distribution_scale_q8(gen);
-                    offset[i] = distribution_offset_q8(gen);
+                    scale[i] = 1.f / distribution_offset_q8(gen);
                 }
-                return QuantizationInfo(scale, offset);
+                return QuantizationInfo(scale);
             }
             case DataType::QASYMM8:
                 return QuantizationInfo(1.f / distribution_scale_q8(gen), distribution_offset_q8(gen));
diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp
index 69a49a3d6d..16f25c4427 100644
--- a/tests/validation/reference/DequantizationLayer.cpp
+++ b/tests/validation/reference/DequantizationLayer.cpp
@@ -65,16 +65,14 @@ SimpleTensor<TOut> dequantization_layer(const SimpleTensor<TIn> &src)
         const int C  = src.shape().z();
         const int N  = src.shape().total_size() / (WH * C);
 
-        const std::vector<float>   qscales     = src.quantization_info().scale();
-        const std::vector<int32_t> qoffsets    = src.quantization_info().offset();
-        const bool                 has_offsets = src_data_type == DataType::QASYMM8_PER_CHANNEL;
+        const std::vector<float> qscales = src.quantization_info().scale();
 
         for(int n = 0; n < N; ++n)
         {
             for(int c = 0; c < C; ++c)
             {
                 const size_t                  idx           = n * C * WH + c * WH;
-                const UniformQuantizationInfo channel_qinfo = { qscales[c], has_offsets ? qoffsets[c] : 0 };
+                const UniformQuantizationInfo channel_qinfo = { qscales[c], 0 };
 
                 // Dequantize slice
                 for(int s = 0; s < WH; ++s)
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index 41bfd1d6c8..ede2ea4b63 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -631,9 +631,6 @@ inline ::std::ostream &operator<<(::std::ostream &os, const DataType &data_type)
         case DataType::QSYMM8_PER_CHANNEL:
             os << "QSYMM8_PER_CHANNEL";
             break;
-        case DataType::QASYMM8_PER_CHANNEL:
-            os << "QASYMM8_PER_CHANNEL";
-            break;
         case DataType::S8:
             os << "S8";
             break;
diff --git a/utils/Utils.h b/utils/Utils.h
index 1d59ee5679..752271cc79 100644
--- a/utils/Utils.h
+++ b/utils/Utils.h
@@ -171,7 +171,6 @@ inline std::string get_typestring(DataType data_type)
     {
         case DataType::U8:
         case DataType::QASYMM8:
-        case DataType::QASYMM8_PER_CHANNEL:
             return no_endianness + "u" + support::cpp11::to_string(sizeof(uint8_t));
         case DataType::S8:
         case DataType::QSYMM8:
-- 
cgit v1.2.1