From a14817a7eee8b8cb7e5ccb6186ca01c23eec2629 Mon Sep 17 00:00:00 2001
From: Sheri Zhang <sheri.zhang@arm.com>
Date: Wed, 26 Feb 2020 10:30:15 +0000
Subject: COMPMID-2756: Add support for QASYMM8_SIGNED in CLDeconvolutionLayer

Tests cover for CLGEMMDeconvolution and CLDirectDeconvolution.

Change-Id: I9a26d0adef1d177ffad39a8992a2dc65327f07e5
Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2853
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 .../runtime/CL/functions/CLDeconvolutionLayer.h    |  6 +-
 src/core/CL/cl_kernels/deconvolution_layer.cl      |  2 +-
 src/core/CL/cl_kernels/permute.cl                  |  4 +-
 src/core/CL/cl_kernels/reshape_layer.cl            |  4 +-
 src/core/CL/cl_kernels/slice_ops.cl                |  4 +-
 tests/validation/CL/DeconvolutionLayer.cpp         | 74 ++++++++++++++++++++++
 .../fixtures/DeconvolutionLayerFixture.h           | 11 +++-
 tests/validation/reference/DeconvolutionLayer.cpp  | 36 ++++++-----
 tests/validation/reference/DeconvolutionLayer.h    |  8 ++-
 9 files changed, 117 insertions(+), 32 deletions(-)

diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
index 8c46bc26cc..78c149d933 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,7 +46,7 @@ public:
 
     /** Set the input, weights, biases and output tensors.
      *
-     * @param[in,out] input        Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8/F16/F32.
+     * @param[in,out] input        Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
      * @param[in]     weights      The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
      * @param[in]     bias         (Optional) The biases have one dimension. Data type supported: Same as @p input.
      * @param[out]    output       Output tensor. The output has the same number of dimensions as the @p input.
@@ -57,7 +57,7 @@ public:
     void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info, const WeightsInfo &weights_info = WeightsInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayer
      *
-     * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8/F16/F32.
+     * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
      * @param[in] weights      The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
      * @param[in] bias         (Optional) The biases have one dimension. Data type supported: Same as @p input.
      * @param[in] output       Output tensor info. The output has the same number of dimensions as the @p input.
diff --git a/src/core/CL/cl_kernels/deconvolution_layer.cl b/src/core/CL/cl_kernels/deconvolution_layer.cl
index a9a6ac1947..cb1abd1bf6 100644
--- a/src/core/CL/cl_kernels/deconvolution_layer.cl
+++ b/src/core/CL/cl_kernels/deconvolution_layer.cl
@@ -64,7 +64,7 @@ __kernel void deconvolution_upsample(
  * @note The height of the input should be given as a preprocessor argument using -DSRC_HEIGHT=width, e.g., -DSRC_HEIGHT=10
  * @note The output data layout is NHWC if the preprocessor argument NUM_FILTERS is defined, NCHW if NUM_FILTERS is not defined
  *
- * @param[in]  src_ptr                            Pointer to the source image. Supported data types: QASYMM8/F16/F32
+ * @param[in]  src_ptr                            Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/F16/F32
  * @param[in]  src_stride_x                       Stride of the source image in X dimension (in bytes)
  * @param[in]  src_step_x                         src_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  src_stride_y                       Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/permute.cl b/src/core/CL/cl_kernels/permute.cl
index 77f03f7d5b..82df141eb0 100644
--- a/src/core/CL/cl_kernels/permute.cl
+++ b/src/core/CL/cl_kernels/permute.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,7 +30,7 @@
  * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. e.g. -DDEPTH_IN=16
  * @attention Permutation vector is passed as a preprocessor arguement using -DP1, -DP2, -DP3 and -DP4=int, e.g. -DP1=2, -DP2=1, -DP3=0 and -DP4=3.
  *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in]  input_ptr                            Pointer to the source image. Supported data types: All
  * @param[in]  input_stride_x                       Stride of the source image in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/reshape_layer.cl b/src/core/CL/cl_kernels/reshape_layer.cl
index 11393d246d..4bfdf1e01f 100644
--- a/src/core/CL/cl_kernels/reshape_layer.cl
+++ b/src/core/CL/cl_kernels/reshape_layer.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,7 +27,7 @@
  *
  * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
  *
- * @param[in]  input_ptr                            Pointer to the first source tensor. Supported data types: U8/S8/U16/S16/U32/S32/F16/F32
+ * @param[in]  input_ptr                            Pointer to the first source tensor. Supported data types: All
  * @param[in]  input_stride_x                       Stride of the first source tensor in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the first source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/slice_ops.cl b/src/core/CL/cl_kernels/slice_ops.cl
index 2163c699dd..5dc0f2d998 100644
--- a/src/core/CL/cl_kernels/slice_ops.cl
+++ b/src/core/CL/cl_kernels/slice_ops.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,7 +32,7 @@
  * @attention Absolute start coordinates for each dimension should be given as preprocessor -DSTART_index=value e.g. -DSTART_0=2
  * @attention Strides for each dimension should be given as preprocessor -DSTRIDE_index=value e.g. -DSTRIDE_1=1
  *
- * @param[in]  input_ptr                            Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/QASYMM16/QSYMM16/F16/U32/S32/F32
+ * @param[in]  input_ptr                            Pointer to the source tensor. Supported data types: All
  * @param[in]  input_stride_x                       Stride of the source tensor in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the source tensor in Y dimension (in bytes)
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
index 4e90e68ebd..e7ba930ebe 100644
--- a/tests/validation/CL/DeconvolutionLayer.cpp
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -333,6 +333,80 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<uint8_t>, fr
 TEST_SUITE_END() // W1x1
 
 TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+// QASYMM8_SIGNED: zero-point in range [-128, 127]
+// QASYMM8       : zero-point in range [0   , 255]
+
+TEST_SUITE(W4x4)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture4x4<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data4x4, framework::dataset::make("DataType",
+                                                                                                                      DataType::QASYMM8_SIGNED)),
+                                                                                                                      data_layouts_dataset),
+                                                                                                                      framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })),
+                                                                                                                      framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })),
+                                                                                                                      add_bias_dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W4x4
+
+TEST_SUITE(W3x3)
+// DirectDeconvolution
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture3x3<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(data3x3_precommit,
+                       framework::dataset::make("DataType",
+                                                DataType::QASYMM8_SIGNED)),
+                       data_layouts_dataset),
+                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 4) })),
+                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 10), QuantizationInfo(4.f / 255.f, 5) })),
+                       add_bias_dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerQuantizedFixture3x3<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3,
+                       framework::dataset::make("DataType",
+                                                DataType::QASYMM8_SIGNED)),
+                       data_layouts_dataset),
+                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, -10), QuantizationInfo(2.f / 255.f, 127) })),
+                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 64), QuantizationInfo(4.f / 255.f, -128) })),
+                       add_bias_dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(W2x2) // GEMMDeconvolution
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture2x2<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(data2x2_precommit,
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       data_layouts_dataset),
+                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 127), QuantizationInfo(2.f / 255.f, -128) })),
+                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, -10), QuantizationInfo(4.f / 255.f, 64) })),
+                       add_bias_dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W2x2
+
+TEST_SUITE(W1x1) // DirectDeconvolution and GEMMDeconvolution
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1, framework::dataset::make("DataType",
+                                                                                                                      DataType::QASYMM8_SIGNED)),
+                                                                                                                      data_layouts_dataset),
+                                                                                                                      framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f / 255.f, 0) })),
+                                                                                                                      framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(4.f / 255.f, 0) })),
+                                                                                                                      add_bias_dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W1x1
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+
 TEST_SUITE_END() // Quantized
 
 TEST_SUITE_END() // DeconvolutionLayer
diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h
index b819e651ff..b9a478b1a9 100644
--- a/tests/validation/fixtures/DeconvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,7 +46,7 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
 class DeconvolutionLayerFixtureBase : public framework::Fixture
 {
 public:
-    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type;
+    using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type;
 
 public:
     template <typename...>
@@ -76,6 +76,13 @@ protected:
                 library->fill(tensor, distribution, i);
                 break;
             }
+            case DataType::QASYMM8_SIGNED:
+            {
+                std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f);
+                std::uniform_int_distribution<int8_t> distribution(bounds.first, bounds.second);
+                library->fill(tensor, distribution, i);
+                break;
+            }
             case DataType::S32:
             {
                 std::uniform_int_distribution<int32_t> distribution(-100, 100);
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 0e0ea57e7d..5750f51e3f 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -38,19 +38,19 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
                                     const PadStrideInfo &info, QuantizationInfo out_qinfo)
 {
     // Create reference
-    const unsigned int pad_left   = info.pad_left();
-    const unsigned int pad_right  = info.pad_right();
-    const unsigned int pad_top    = info.pad_top();
-    const unsigned int pad_bottom = info.pad_bottom();
-    const int stride_x            = info.stride().first;
-    const int stride_y            = info.stride().second;
-    const int weights_width       = weights.shape().x();
-    const int weights_height      = weights.shape().y();
-    const int weights_upper_dims  = weights.shape().total_size() / (weights_width * weights_height);
+    const unsigned int pad_left           = info.pad_left();
+    const unsigned int pad_right          = info.pad_right();
+    const unsigned int pad_top            = info.pad_top();
+    const unsigned int pad_bottom         = info.pad_bottom();
+    const int          stride_x           = info.stride().first;
+    const int          stride_y           = info.stride().second;
+    const int          weights_width      = weights.shape().x();
+    const int          weights_height     = weights.shape().y();
+    const int          weights_upper_dims = weights.shape().total_size() / (weights_width * weights_height);
 
-    ARM_COMPUTE_ERROR_ON(pad_left   > (weights.shape().x() - 1));
-    ARM_COMPUTE_ERROR_ON(pad_right  > (weights.shape().x() - 1));
-    ARM_COMPUTE_ERROR_ON(pad_top    > (weights.shape().y() - 1));
+    ARM_COMPUTE_ERROR_ON(pad_left > (weights.shape().x() - 1));
+    ARM_COMPUTE_ERROR_ON(pad_right > (weights.shape().x() - 1));
+    ARM_COMPUTE_ERROR_ON(pad_top > (weights.shape().y() - 1));
     ARM_COMPUTE_ERROR_ON(pad_bottom > (weights.shape().y() - 1));
 
     // Find the upsampled dimensions
@@ -67,14 +67,14 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
     unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0;
     deconv_pad_x -= deconv_pad_left + deconv_pad_right;
     ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0);
-    deconv_pad_left  += deconv_pad_x / 2;
+    deconv_pad_left += deconv_pad_x / 2;
     deconv_pad_right += deconv_pad_x / 2;
 
     unsigned int deconv_pad_top    = pad_bottom > pad_top ? pad_bottom - pad_top : 0;
     unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0;
     deconv_pad_y -= deconv_pad_top + deconv_pad_bottom;
     ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0);
-    deconv_pad_top    += deconv_pad_y / 2;
+    deconv_pad_top += deconv_pad_y / 2;
     deconv_pad_bottom += deconv_pad_y / 2;
 
     TensorShape scaled_shape = src.shape();
@@ -88,9 +88,9 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
     const int height_scaled = scaled.shape().y();
     const int num_2d_slices = src.shape().total_size() / (width_in * height_in);
 
-    if(src.data_type() == DataType::QASYMM8)
+    if(src.data_type() == DataType::QASYMM8 || src.data_type() == DataType::QASYMM8_SIGNED)
     {
-        const uint8_t quantized_zero = src.quantization_info().uniform().offset;
+        const auto quantized_zero = static_cast<T>(src.quantization_info().uniform().offset);
         std::fill_n(scaled.data(), scaled.num_elements(), quantized_zero);
     }
     else
@@ -138,6 +138,8 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
 
 template SimpleTensor<uint8_t> deconvolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
                                                    const PadStrideInfo &info, QuantizationInfo out_quant_info);
+template SimpleTensor<int8_t> deconvolution_layer(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
+                                                  const PadStrideInfo &info, QuantizationInfo out_quant_info);
 template SimpleTensor<float> deconvolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
                                                  const PadStrideInfo &info, QuantizationInfo out_quant_info);
 template SimpleTensor<half> deconvolution_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &output_shape,
diff --git a/tests/validation/reference/DeconvolutionLayer.h b/tests/validation/reference/DeconvolutionLayer.h
index db394faa68..fff529a719 100644
--- a/tests/validation/reference/DeconvolutionLayer.h
+++ b/tests/validation/reference/DeconvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,9 +37,11 @@ namespace reference
 {
 /** Deconvolution reference implementation.
  *
- * src              Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+ * src              Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
+ *                  Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
  * weights          The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input.
- * bias             Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
+ * bias             Optional, ignored if NULL. The biases have one dimension.
+ *                  Data type supported: Same as @p input, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
  * output_shape     Output tensor shape. The output has the same number of dimensions as the @p input.
  * info             Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
  * a                The number of zeros added to right and top edges of the input.
-- 
cgit v1.2.1