From 9d061b0c2a71f24c7fb00184e9409aaeea37f78d Mon Sep 17 00:00:00 2001
From: Freddie Liardet <frederick.liardet@arm.com>
Date: Tue, 6 Apr 2021 15:59:28 +0100
Subject: Add per channel quantization support for NEDeconvolutionLayer

Add QSYMM8_PER_CHANNEL support on weight input for NEDeconvolutionLayer and reference version.

Resolves: COMPMID-3437

Signed-off-by: Freddie Liardet <frederick.liardet@arm.com>
Change-Id: I7c9a28d4d0fea324ed8e5a24fbd0422e5ede145c
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5364
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 .../runtime/NEON/functions/NEConvolutionLayer.h    |  6 +-
 .../runtime/NEON/functions/NEDeconvolutionLayer.h  |  9 +-
 .../NEON/functions/NEDeconvolutionLayer.cpp        | 32 ++++---
 tests/validation/NEON/DeconvolutionLayer.cpp       | 95 ++++++++++++++++++++-
 .../fixtures/DeconvolutionLayerFixture.h           | 97 +++++++++++++++++-----
 tests/validation/reference/DeconvolutionLayer.cpp  | 11 +--
 tests/validation/reference/DeconvolutionLayer.h    | 10 +--
 7 files changed, 206 insertions(+), 54 deletions(-)

diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index d2d41c1e8a..b1e85523c5 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -89,7 +89,7 @@ public:
      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
      *                              while every optional dimension from 4 and above represent a batch of inputs.
      *                              Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in]  weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
+     * @param[in]  weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
      * @param[in]  biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
      *                              Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
      * @param[out] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
@@ -110,7 +110,7 @@ public:
      * @param[in] input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
      *                             while every optional dimension from 4 and above represent a batch of inputs.
      *                             Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in] weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
      * @param[in] biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
      *                             Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
      * @param[in] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
@@ -134,7 +134,7 @@ public:
      * @param[in] input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
      *                             while every optional dimension from 4 and above represent a batch of inputs.
      *                             Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in] weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
      * @param[in] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                             Data types supported: Same as @p input.
      * @param[in] conv_info        Contains padding and stride information described in @ref PadStrideInfo.
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 3864a663c2..c16cf26095 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -68,7 +68,6 @@ namespace arm_compute
  *
  * -# @ref CPPUpsample
  * -# @ref NEConvolutionLayer
- * -# @ref NEPermute
  * -# @ref NEReverse
  *
  */
@@ -92,8 +91,8 @@ public:
     /** Set the input, weights, biases and output tensors.
      *
      * @param[in,out] input   Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
-     * @param[in]     weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
-     * @param[in]     bias    Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+     * @param[in]     weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+     * @param[in]     bias    Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
      * @param[out]    output  Output tensor. The output has the same number of dimensions as the @p input.
      * @param[in]     info    Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
      *
@@ -102,8 +101,8 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer
      *
      * @param[in] input   Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
-     * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
-     * @param[in] bias    (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+     * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
+     * @param[in] bias    (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
      * @param[in] output  Output tensor info. The output has the same number of dimensions as the @p input.
      * @param[in] info    Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
      *
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index 348c0136ec..5bd61b4074 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -85,16 +85,22 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
     const unsigned int width_idx  = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
     const unsigned int height_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx));
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
+    if(is_data_type_quantized_per_channel(weights->data_type()) && is_data_type_quantized(input->data_type()))
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QSYMM8_PER_CHANNEL);
+    }
+    else
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+    }
 
     auto out_dims = deconvolution_output_dimensions(input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx), weights->dimension(height_idx), info);
 
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
     if(bias != nullptr)
     {
         if(is_data_type_quantized_asymmetric(input->data_type()))
@@ -118,20 +124,20 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid.");
     }
 
-    uint32_t            deconv_pad_x    = 0;
-    uint32_t            deconv_pad_y    = 0;
-    const unsigned int  stride_x        = info.stride().first;
-    const unsigned int  stride_y        = info.stride().second;
+    uint32_t           deconv_pad_x = 0;
+    uint32_t           deconv_pad_y = 0;
+    const unsigned int stride_x     = info.stride().first;
+    const unsigned int stride_y     = info.stride().second;
     // Guard against overflows in compute_deconvolution_upsampled_shape()
-    const DataLayout data_layout = input->data_layout();
-    const size_t     idx_w       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
-    const size_t     idx_h       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-    const unsigned int out_x = (input->dimension(idx_w) - 1) * stride_x + 1;
-    const unsigned int out_y = (input->dimension(idx_h) - 1) * stride_y + 1;
+    const DataLayout   data_layout = input->data_layout();
+    const size_t       idx_w       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+    const size_t       idx_h       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+    const unsigned int out_x       = (input->dimension(idx_w) - 1) * stride_x + 1;
+    const unsigned int out_y       = (input->dimension(idx_h) - 1) * stride_y + 1;
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) > out_x);
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) > out_y);
     ARM_COMPUTE_RETURN_ERROR_ON((out_x - weights->dimension(idx_w) + 1) > out_dims.first);
-    ARM_COMPUTE_RETURN_ERROR_ON((out_y - weights->dimension(idx_h) + 1 ) > out_dims.second);
+    ARM_COMPUTE_RETURN_ERROR_ON((out_y - weights->dimension(idx_h) + 1) > out_dims.second);
 
     const TensorShape   scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, out_dims, deconv_pad_x, deconv_pad_y);
     TensorInfo          scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index adb5d1709d..4c6ee2615d 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -90,6 +90,7 @@ const auto output_qinfo_dataset = framework::dataset::make("OutputQInfo",
     QuantizationInfo(3.f / 255.f, 0),
     QuantizationInfo(4.f, 0),
 });
+
 } // namespace
 
 TEST_SUITE(NEON)
@@ -273,6 +274,15 @@ using NEDeconvolutionLayerQuantizedFixture3x3 = DeconvolutionValidationQuantized
 template <typename T>
 using NEDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>;
 
+template <typename T>
+using NEDeconvolutionLayerQuantizedPerChannelFixture4x4 = DeconvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEDeconvolutionLayer, T, int8_t, 4, 4>;
+
+template <typename T>
+using NEDeconvolutionLayerQuantizedPerChannelFixture3x3 = DeconvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEDeconvolutionLayer, T, int8_t, 3, 3>;
+
+template <typename T>
+using NEDeconvolutionLayerQuantizedPerChannelFixture1x1 = DeconvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEDeconvolutionLayer, T, int8_t, 1, 1>;
+
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 
@@ -370,8 +380,9 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<int8_t>
 TEST_SUITE_END() // W3x3
 
 TEST_SUITE(W1x1)
-FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1, framework::dataset::make("DataType",
-                                                                                                                      DataType::QASYMM8_SIGNED)),
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1,
+                                                                                                                      framework::dataset::make("DataType",
+                                                                                                                              DataType::QASYMM8_SIGNED)),
                                                                                                                       data_layouts_dataset),
                                                                                                                       input_qinfo_dataset),
                                                                                                                       output_qinfo_dataset),
@@ -383,6 +394,86 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<int8_t>, fra
 TEST_SUITE_END() // W1x1
 
 TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE(QSYMM8_PER_CHANNEL)
+
+TEST_SUITE(W4x4)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture4x4<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data4x4,
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       add_bias_dataset),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture4x4<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data4x4,
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       add_bias_dataset),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+}
+TEST_SUITE_END() // W4x4
+
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture3x3<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data3x3,
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       add_bias_dataset),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture3x3<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data3x3,
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       add_bias_dataset),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+}
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture1x1<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data1x1,
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       add_bias_dataset),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture1x1<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data1x1,
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       add_bias_dataset),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+}
+TEST_SUITE_END() // W1x1
+
+TEST_SUITE_END() // QSYMM8_PER_CHANNEL
+
 TEST_SUITE_END() // Quantized
 
 TEST_SUITE_END() // DeconvolutionLayer
diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h
index 6ea2335ae9..4bc1d3bb45 100644
--- a/tests/validation/fixtures/DeconvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h
@@ -42,7 +42,7 @@ namespace validation
 {
 using namespace arm_compute::misc::shape_calculator;
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW>
 class DeconvolutionLayerFixtureBase : public framework::Fixture
 {
 public:
@@ -51,13 +51,16 @@ public:
 public:
     template <typename...>
     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
-               DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias)
+               DataType data_type, DataType weights_data_type, DataLayout data_layout,
+               QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, QuantizationInfo weights_quantization_info, bool add_bias)
     {
-        _data_type                = data_type;
-        _bias_data_type           = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
-        _data_layout              = data_layout;
-        _input_quantization_info  = input_quantization_info;
-        _output_quantization_info = output_quantization_info;
+        _data_type                 = data_type;
+        _weights_data_type         = weights_data_type;
+        _bias_data_type            = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+        _data_layout               = data_layout;
+        _input_quantization_info   = input_quantization_info;
+        _output_quantization_info  = output_quantization_info;
+        _weights_quantization_info = weights_quantization_info;
 
         _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, add_bias);
         _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, add_bias);
@@ -83,6 +86,26 @@ protected:
                 library->fill(tensor, distribution, i);
                 break;
             }
+            case DataType::QSYMM8_PER_CHANNEL:
+            {
+                int min_bound = 128;
+                int max_bound = -127;
+                for(size_t i = 0; i < _input_quantization_info.scale().size(); i++)
+                {
+                    std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f);
+                    if(bounds.first < min_bound)
+                    {
+                        min_bound = bounds.first;
+                    }
+                    if(bounds.second > max_bound)
+                    {
+                        max_bound = bounds.second;
+                    }
+                }
+                std::uniform_int_distribution<int8_t> distribution(min_bound, max_bound);
+                library->fill(tensor, distribution, i);
+                break;
+            }
             case DataType::S32:
             {
                 std::uniform_int_distribution<int32_t> distribution(-100, 100);
@@ -140,7 +163,7 @@ protected:
 
         // Create tensors
         TensorType src     = create_tensor<TensorType>(input_shape, _data_type, 1, _input_quantization_info, _data_layout);
-        TensorType weights = create_tensor<TensorType>(weights_shape, _data_type, 1, _input_quantization_info, _data_layout);
+        TensorType weights = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout);
         TensorType bias    = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, _input_quantization_info, _data_layout);
         TensorType dst     = create_tensor<TensorType>(output_shape, _data_type, 1, _output_quantization_info, _data_layout);
 
@@ -183,7 +206,6 @@ protected:
 
         // Compute DeconvolutionLayer function
         conv.run();
-
         return dst;
     }
 
@@ -192,7 +214,7 @@ protected:
     {
         // Create reference
         SimpleTensor<T>     src{ input_shape, _data_type, 1, _input_quantization_info };
-        SimpleTensor<T>     weights{ weights_shape, _data_type, 1, _input_quantization_info };
+        SimpleTensor<TW>    weights{ weights_shape, _weights_data_type, 1, _weights_quantization_info };
         SimpleTensor<TBias> bias{ bias_shape, _bias_data_type, 1, _input_quantization_info };
 
         // Fill reference
@@ -207,21 +229,22 @@ protected:
         {
             fill_zeros(bias);
         }
-
-        return reference::deconvolution_layer<T>(src, weights, bias, output_shape, info, _output_quantization_info);
+        return reference::deconvolution_layer<T, TW>(src, weights, bias, output_shape, info, _output_quantization_info);
     }
 
     TensorType       _target{};
     SimpleTensor<T>  _reference{};
     DataType         _data_type{};
+    DataType         _weights_data_type{};
     DataType         _bias_data_type{};
     DataLayout       _data_layout{};
     QuantizationInfo _input_quantization_info{};
     QuantizationInfo _output_quantization_info{};
+    QuantizationInfo _weights_quantization_info{};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, unsigned int kernel_size_x, unsigned int kernel_size_y>
-class DeconvolutionValidationFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>
+class DeconvolutionValidationFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>
 {
 public:
     template <typename...>
@@ -236,13 +259,13 @@ public:
         TensorInfo          input_info(input_shape, 1, data_type);
         TensorInfo          weights_info(weights_shape, 1, data_type);
         TensorShape         output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info);
-        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_layout, QuantizationInfo(),
-                                                                                        QuantizationInfo(), add_bias);
+        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_type, data_layout, QuantizationInfo(),
+                                                                                           QuantizationInfo(), QuantizationInfo(), add_bias);
     }
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, unsigned int kernel_size_x, unsigned int kernel_size_y>
-class DeconvolutionValidationAsymmFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>
+class DeconvolutionValidationAsymmFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>
 {
 public:
     template <typename...>
@@ -257,13 +280,13 @@ public:
         TensorInfo          input_info(input_shape, 1, data_type);
         TensorInfo          weights_info(weights_shape, 1, data_type);
         TensorShape         output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info);
-        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_layout, QuantizationInfo(),
-                                                                                        QuantizationInfo(), add_bias);
+        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_type, data_layout, QuantizationInfo(),
+                                                                                           QuantizationInfo(), QuantizationInfo(), add_bias);
     }
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, unsigned int kernel_size_x, unsigned int kernel_size_y>
-class DeconvolutionValidationQuantizedFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>
+class DeconvolutionValidationQuantizedFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>
 {
 public:
     template <typename...>
@@ -278,8 +301,40 @@ public:
         TensorInfo          input_info(input_shape, 1, data_type, input_quantization_info);
         TensorInfo          weights_info(weights_shape, 1, data_type, input_quantization_info);
         TensorShape         output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info);
-        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_layout, input_quantization_info,
-                                                                                        output_quantization_info, add_bias);
+        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_type, data_layout,
+                                                                                           input_quantization_info,
+                                                                                           output_quantization_info, input_quantization_info, add_bias);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW, unsigned int kernel_size_x, unsigned int kernel_size_y>
+class DeconvolutionValidationQuantizedPerChannelFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, TW>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady,
+               unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias,
+               DataType weights_data_type)
+    {
+        ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported");
+        const TensorShape   weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
+        const TensorShape   bias_shape(num_kernels);
+        const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL);
+        auto                out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, info);
+        TensorInfo          input_info(input_shape, 1, data_type, input_quantization_info);
+        TensorInfo          weights_info(weights_shape, 1, weights_data_type, input_quantization_info);
+        TensorShape         output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info);
+
+        std::vector<float>                    weights_scales{};
+        std::mt19937                          gen(library->seed());
+        std::uniform_real_distribution<float> dis(0.01f, 1.f);
+        for(size_t i = 0; i < output_shape[2]; ++i)
+        {
+            weights_scales.push_back(dis(gen));
+        }
+        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, TW>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, weights_data_type, data_layout,
+                                                                                            input_quantization_info,
+                                                                                            output_quantization_info, QuantizationInfo(weights_scales), add_bias);
     }
 };
 
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 891828533f..afbf063e63 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #include "ConvolutionLayer.h"
 
 #include "tests/validation/Helpers.h"
-
 namespace arm_compute
 {
 namespace test
@@ -33,8 +32,8 @@ namespace validation
 {
 namespace reference
 {
-template <typename T, typename TB>
-SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape,
+template <typename T, typename TW, typename TB>
+SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<TW> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape,
                                     const PadStrideInfo &info, QuantizationInfo out_qinfo)
 {
     // Create reference
@@ -99,7 +98,7 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
     }
 
     // Flip weights by 180 degrees
-    SimpleTensor<T> weights_flipped{ weights.shape(), weights.data_type(), 1, weights.quantization_info() };
+    SimpleTensor<TW> weights_flipped{ weights.shape(), weights.data_type(), 1, weights.quantization_info(), weights.data_layout() };
 #if defined(_OPENMP)
     #pragma omp parallel for
 #endif /* _OPENMP */
@@ -143,6 +142,8 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
 
 template SimpleTensor<uint8_t> deconvolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
                                                    const PadStrideInfo &info, QuantizationInfo out_quant_info);
+template SimpleTensor<uint8_t> deconvolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
+                                                   const PadStrideInfo &info, QuantizationInfo out_quant_info);
 template SimpleTensor<int8_t> deconvolution_layer(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
                                                   const PadStrideInfo &info, QuantizationInfo out_quant_info);
 template SimpleTensor<float> deconvolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
diff --git a/tests/validation/reference/DeconvolutionLayer.h b/tests/validation/reference/DeconvolutionLayer.h
index 07b9a531a7..16f0d9ae59 100644
--- a/tests/validation/reference/DeconvolutionLayer.h
+++ b/tests/validation/reference/DeconvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,16 +39,16 @@ namespace reference
  *
  * src              Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
  *                  Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * weights          The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input.
+ * weights          The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED.
  * bias             Optional, ignored if NULL. The biases have one dimension.
- *                  Data type supported: Same as @p input, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ *                  Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED types where biases should be of S32 type
  * output_shape     Output tensor shape. The output has the same number of dimensions as the @p input.
  * info             Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
  * a                The number of zeros added to right and top edges of the input.
  *
  */
-template <typename T, typename TB>
-SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
+template <typename T, typename TW, typename TB>
+SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<TW> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
                                     QuantizationInfo out_qinfo = QuantizationInfo());
 } // namespace reference
 } // namespace validation
-- 
cgit v1.2.1