From 4cd4cdeedd8431556806bb6f3b6ff559b757e792 Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Mon, 6 Jan 2020 14:07:44 +0000
Subject: COMPMID-2757: Add support for QASYMM8_SIGNED in
 CLDepthwiseConvolutionLayer

Change-Id: I1f292f98bc3a213ba5b26ac88aa78160c809cb87
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2540
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 .../CLDepthwiseConvolutionLayerNativeKernel.h      | 14 ++++-----
 .../CL/functions/CLDepthwiseConvolutionLayer.h     | 14 ++++-----
 .../cl_kernels/depthwise_convolution_quantized.cl  |  6 ++--
 .../CLDepthwiseConvolutionLayerNativeKernel.cpp    |  6 ++--
 tests/validation/CL/DepthwiseConvolutionLayer.cpp  | 35 ++++++++++++++++++++--
 .../fixtures/DepthwiseConvolutionLayerFixture.h    |  3 +-
 .../reference/DepthwiseConvolutionLayer.cpp        | 18 ++++++++---
 7 files changed, 69 insertions(+), 27 deletions(-)

diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
index 4c1bdaaf5d..7e19ed6285 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,11 +49,11 @@ public:
     CLDepthwiseConvolutionLayerNativeKernel &operator=(CLDepthwiseConvolutionLayerNativeKernel &&) = default;
     /** Initialize the function's source, destination and parameters
      *
-     * @param[in]  input              Source tensor. Data type supported: QASYMM8/FP32/FP16. Data layout supported: NHWC
+     * @param[in]  input              Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
      * @param[in]  weights            Weights tensor. A 3D tensor with dimensions [IFM, N, M].
-     *                                Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     *                                Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
      * @param[in]  biases             Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                                Data type supported: Same as @p input, S32 when input is QASYMM8.
+     *                                Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
      * @param[out] output             Destination tensor. Data type supported: Same as @p input.
      * @param[in]  dwc_weights_info   Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
      * @param[in]  dwc_info           Depthwise convolution layer info
@@ -70,11 +70,11 @@ public:
                    const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel
      *
-     * @param[in] input              Source tensor info. Data type supported: QASYMM8/FP32/FP16. Data layout supported: NHWC
+     * @param[in] input              Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
      * @param[in] weights            Weights tensor info. A 3D tensor with dimensions [IFM, N, M].
-     *                               Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     *                               Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
      * @param[in] biases             Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                               Data type supported: Same as @p input, S32 when input is QASYMM8.
+     *                               Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
      * @param[in] output             Destination tensor info. Data type supported: Same as @p input.
      * @param[in] dwc_weights_info   Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
      * @param[in] dwc_info           Depthwise convolution layer info
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index 7ce7cce064..4668e82bab 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -221,11 +221,11 @@ private:
         CLDepthwiseConvolutionLayerGeneric &operator=(CLDepthwiseConvolutionLayerGeneric &&) = default;
         /** Initialize the function's source, destination, weights and convolution information.
          *
-         * @param[in, out] input            Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
+         * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).
          * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
-         *                                  Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+         *                                  Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
          * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
-         *                                  Data type supported: Same as @p input, S32 when input is QASYMM8.
+         *                                  Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
          * @param[out]     output           Destination tensor. Data type supported: same as @p input.
          * @param[in]      conv_info        Padding and stride information to use for the convolution.
          * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
@@ -237,11 +237,11 @@ private:
 
         /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric
          *
-         * @param[in] input            Source tensor info. Data type supported: QASYMM8/F32.
+         * @param[in] input            Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F32.
          * @param[in] weights          Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
-         *                             Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+         *                             Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
          * @param[in] biases           Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
-         *                             Data type supported: Same as @p input, S32 when input is QASYMM8.
+         *                             Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
          * @param[in] output           Destination tensor. Data type supported: same as @p input.
          * @param[in] conv_info        Padding and stride information to use for the convolution.
          * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
index dc8078acb8..3cfa707e24 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -1634,7 +1634,7 @@ __kernel void dwc_3x3_reshaped_quantized8_dot8_stride1_nhwc(
  * @note It is possible to select the activation function to apply using -DACTIVATION_TYPE e.g. -DACTIVATION_TYPE=relu
  * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively
  *
- * @param[in] src_ptr                                          Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in] src_ptr                                          Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
  * @param[in] src_stride_x                                     Stride of the source tensor in X dimension (in bytes)
  * @param[in] src_step_x                                       src_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in] src_stride_y                                     Stride of the source tensor in Y dimension (in bytes)
@@ -1654,7 +1654,7 @@ __kernel void dwc_3x3_reshaped_quantized8_dot8_stride1_nhwc(
  * @param[in] dst_stride_w                                     Stride of the destination tensor in W dimension (in bytes)
  * @param[in] dst_step_w                                       dst_stride_w * number of elements along W processed per workitem(in bytes)
  * @param[in] dst_offset_first_element_in_bytes                The offset of the first element in the destination tensor
- * @param[in] weights_ptr                                      Pointer to the weights tensor. Supported data types: QASYMM8/QSYMM8_PER_CHANNEL
+ * @param[in] weights_ptr                                      Pointer to the weights tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL
  * @param[in] weights_stride_x                                 Stride of the weights tensor in X dimension (in bytes)
  * @param[in] weights_step_x                                   weights_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in] weights_stride_y                                 Stride of the weights tensor in Y dimension (in bytes)
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
index 2155306d62..334691df93 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,9 +46,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
                           const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts)
 {
     ARM_COMPUTE_UNUSED(dwc_info);
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier > 1 && dwc_weights_info.n0 != 1);
     ARM_COMPUTE_RETURN_ERROR_ON(conv_info.stride().first < 1);
     ARM_COMPUTE_RETURN_ERROR_ON(conv_info.stride().second < 1);
@@ -105,6 +106,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
     if(output->total_size() != 0)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
     }
 
     if(is_data_type_quantized(input->data_type()))
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index e2cdf5403a..2e8febf517 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -565,7 +565,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uin
                                                                framework::dataset::make("DataType", DataType::QASYMM8)),
                                                        framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
                                                framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })), // NCHW is tested with int8
                                ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -588,7 +588,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uin
                                                                framework::dataset::make("DataType", DataType::QASYMM8)),
                                                        framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
                                                framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })), // NCHW is tested with int8
                                ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -656,6 +656,35 @@ TEST_SUITE_END() // Dilation
 TEST_SUITE_END() // W3x3
 TEST_SUITE_END() // QASYMM8
 
+TEST_SUITE(QASYMM8_SIGNED)
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                                       depth_multipliers),
+                                                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
+                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+                               ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                                                       depth_multipliers),
+                                                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
+                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+                               ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // Generic
+TEST_SUITE_END() // QASYMM8_SIGNED
+
 TEST_SUITE(QSYMM8_PER_CHANNEL)
 TEST_SUITE(Generic)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index f909885245..7016e9fb68 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -90,6 +90,7 @@ protected:
                 library->fill(tensor, distribution, i);
                 break;
             }
+            case DataType::QASYMM8_SIGNED:
             case DataType::QSYMM8_PER_CHANNEL:
             {
                 std::uniform_int_distribution<int8_t> distribution(-10, 10);
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index 4245140373..7bba98a0c6 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -130,8 +130,8 @@ SimpleTensor<T> depthwise_convolution_fp(const SimpleTensor<T> &src, const Simpl
  * - Third dimention is number of channels
  * - Depths of input tensor and filter are equals
  * - Padding, stride and output shape "match"
- * - QASYMM8 input, output
- * - QASYMM8 or QSYMM8_PER_CHANNEL filter
+ * - QASYMM8/QASYMM8_SIGNED input, output
+ * - QASYMM8/QASYMM8_SIGNED or QSYMM8_PER_CHANNEL filter
  *
  */
 template <typename T, typename TW, typename TB>
@@ -179,6 +179,9 @@ SimpleTensor<T> depthwise_convolution_quantized(const SimpleTensor<T> &src, cons
 
     const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights.data_type());
 
+    const int min = std::numeric_limits<T>::lowest();
+    const int max = std::numeric_limits<T>::max();
+
     int out_pos = 0;
     for(int r = 0; r < num_batches; ++r)
     {
@@ -217,7 +220,7 @@ SimpleTensor<T> depthwise_convolution_quantized(const SimpleTensor<T> &src, cons
                         }
                         val += bias_val;
                         // Quantize down
-                        val = quantize_down_scale_by_fixedpoint(val, output_multiplier, output_shift, output_offset, 0, 255);
+                        val = quantize_down_scale_by_fixedpoint(val, output_multiplier, output_shift, output_offset, min, max);
 
                         // Store the result
                         dst[out_pos++] = val;
@@ -258,6 +261,13 @@ SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, co
 {
     return depthwise_convolution_quantized<uint8_t, int8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
 }
+
+template <>
+SimpleTensor<int8_t> depthwise_convolution(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
+                                           const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
+{
+    return depthwise_convolution_quantized<int8_t, int8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
+}
 } // namespace reference
 } // namespace validation
 } // namespace test
-- 
cgit v1.2.1