From 681f2d4263c5e762ea4c7b3d0ba7a087823d36fc Mon Sep 17 00:00:00 2001
From: Sheri Zhang <sheri.zhang@arm.com>
Date: Thu, 20 Feb 2020 11:23:08 +0000
Subject: COMPMID-2758: Add support for QASYMM8_SIGNED in
 CLDirectConvolutionLayer

Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Change-Id: I0c153f7d880005aeced38cc64b7571578a5ea7f3
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2753
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
---
 .../CL/kernels/CLDirectConvolutionLayerKernel.h    |  9 +--
 .../CL/functions/CLDirectConvolutionLayer.h        | 11 ++--
 .../CL/cl_kernels/direct_convolution_quantized.cl  | 71 ++++++++++++----------
 .../CL/kernels/CLDirectConvolutionLayerKernel.cpp  |  2 +-
 tests/validation/CL/DirectConvolutionLayer.cpp     | 36 ++++++++++-
 .../fixtures/DirectConvolutionLayerFixture.h       | 11 +++-
 6 files changed, 95 insertions(+), 45 deletions(-)

diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
index b37d3a4047..5bf9a5d57f 100644
--- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
@@ -57,12 +57,12 @@ public:
      *        9x9 convolution with stride_x = 1/2, stride_y = 1/2, data_layout=NHWC
      *
      * @param[in]  input     The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
-     *                       while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32.
+     *                       while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
      * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
      *                       The 3rd dimension must be the same as the input's volume 3rd dimension.
      *                       Data type supported:Same as @p input.
      * @param[in]  biases    Biases tensor. Biases are 1D tensor with dimension [OFM].
-     *                       Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type
+     *                       Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
      * @param[out] output    Output tensor.
      *                       The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
      * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -71,11 +71,12 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel
      *
      * @param[in] input     The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
-     *                      while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32.
+     *                      while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
      * @param[in] weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
      *                      The 3rd dimension must be the same as the input's volume 3rd dimension.
      *                      Data type supported:Same as @p input.
-     * @param[in] biases    Biases tensor. Biases are 1D tensor with dimension [OFM]. Data type supported: Same as @p input.
+     * @param[in] biases    Biases tensor. Biases are 1D tensor with dimension [OFM].
+     *                      Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type.
      * @param[in] output    Output tensor.
      *                      The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
      * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
index 10bcb75e43..045b1c0c99 100644
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,10 +47,10 @@ public:
      *
      * @param[in]  input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
      *                       while every optional dimension from 4 and above represent a batch of inputs.
-     *                       Data types supported: QASYMM8/F16/F32.
+     *                       Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
      * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
      * @param[in]  biases    Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
-     *                       Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+     *                       Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type.
      * @param[out] output    Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                       Data types supported: Same as @p input.
      * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -61,9 +61,10 @@ public:
      *
      * @param[in] input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
      *                      while every optional dimension from 4 and above represent a batch of inputs.
-     *                      Data types supported: QASYMM8/F16/F32.
+     *                      Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
      * @param[in] weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
-     * @param[in] biases    Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
+     * @param[in] biases    Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+     *                      Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type.
      * @param[in] output    Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                      Data types supported: Same as @p input.
      * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
diff --git a/src/core/CL/cl_kernels/direct_convolution_quantized.cl b/src/core/CL/cl_kernels/direct_convolution_quantized.cl
index 37fd9a0778..0a8c5faecf 100644
--- a/src/core/CL/cl_kernels/direct_convolution_quantized.cl
+++ b/src/core/CL/cl_kernels/direct_convolution_quantized.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,10 +23,14 @@
  */
 #include "helpers_asymm.h"
 
+#undef CONVERT_SAT_STR
 #undef CONVERT_SAT
 
 #if defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) && defined(OUTPUT_MULTIPLIER) && defined(OUTPUT_SHIFT)
 
+#define CONVERT_SAT_STR(x, type) (convert_##type##8_sat((x)))
+#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
+
 #if KERNEL_SIZE == 9
 
 #if STRIDE_X == 1
@@ -155,7 +159,7 @@
  *
  * @return extracted input pixels.
  */
-inline uchar8 extract_input_stride1(__global const uchar *input_pixel)
+inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride1(__global const DATA_TYPE *input_pixel)
 {
     return vload8(0, input_pixel);
 }
@@ -166,9 +170,10 @@ inline uchar8 extract_input_stride1(__global const uchar *input_pixel)
  *
  * @return extracted input pixels.
  */
-inline uchar8 extract_input_stride2(__global const uchar *input_pixel)
+inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride2(__global const DATA_TYPE *input_pixel)
 {
-    uchar16 temp = vload16(0, input_pixel);
+    VEC_DATA_TYPE(DATA_TYPE, 16)
+    temp = vload16(0, input_pixel);
     return temp.s02468ace;
 }
 
@@ -178,11 +183,13 @@ inline uchar8 extract_input_stride2(__global const uchar *input_pixel)
  *
  * @return extracted input pixels.
  */
-inline uchar8 extract_input_stride3(__global const uchar *input_pixel)
+inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride3(__global const DATA_TYPE *input_pixel)
 {
-    uchar16 temp1 = vload16(0, input_pixel);
-    uchar16 temp2 = vload16(0, input_pixel + 12);
-    return (uchar8)(temp1.s0369, temp2.s0369);
+    VEC_DATA_TYPE(DATA_TYPE, 16)
+    temp1 = vload16(0, input_pixel);
+    VEC_DATA_TYPE(DATA_TYPE, 16)
+    temp2 = vload16(0, input_pixel + 12);
+    return (VEC_DATA_TYPE(DATA_TYPE, 8))(temp1.s0369, temp2.s0369);
 }
 
 #else /* KERNEL_SIZE not equals 1, 3 , 5, 9 */
@@ -197,7 +204,7 @@ inline uchar8 extract_input_stride3(__global const uchar *input_pixel)
  * @note The output quantization multiplier must be passed at compile time using -DOUTPUT_MULTIPLIER e.g. -DOUTPUT_MULTIPLIER=1234
  * @note The output quantization shift must be passed at compile time using -DOUTPUT_SHIFT e.g. -DOUTPUT_SHIFT=4
  *
- * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
  * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
  * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
@@ -213,7 +220,7 @@ inline uchar8 extract_input_stride3(__global const uchar *input_pixel)
  * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
  * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
  * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
- * @param[in]  weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p weights_ptr
+ * @param[in]  weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
  * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
  * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
@@ -248,8 +255,8 @@ __kernel void direct_convolution_quantized(
 
     int8 pixels0 = 0;
 
-    __global uchar *weights_addr = (__global uchar *)tensor3D_offset(&weights, 0, 0, 0);
-    __global uchar *src_addr     = (__global uchar *)offset(&src, 0, 0);
+    __global DATA_TYPE *weights_addr = (__global DATA_TYPE *)tensor3D_offset(&weights, 0, 0, 0);
+    __global DATA_TYPE *src_addr     = (__global DATA_TYPE *)offset(&src, 0, 0);
 
     const int kernel_index = get_global_id(2);
     weights_addr += kernel_index * weights_stride_w;
@@ -257,28 +264,28 @@ __kernel void direct_convolution_quantized(
     for(volatile int d = 0; d < WEIGHTS_DEPTH; ++d)
     {
 #if KERNEL_SIZE == 9
-        CONVOLUTION1x9(pixels0, (__global uchar *)(src_addr + 0 * src_stride_y), (__global uchar *)(weights_addr + 0 * weights_stride_y));
-        CONVOLUTION1x9(pixels0, (__global uchar *)(src_addr + 1 * src_stride_y), (__global uchar *)(weights_addr + 1 * weights_stride_y));
-        CONVOLUTION1x9(pixels0, (__global uchar *)(src_addr + 2 * src_stride_y), (__global uchar *)(weights_addr + 2 * weights_stride_y));
-        CONVOLUTION1x9(pixels0, (__global uchar *)(src_addr + 3 * src_stride_y), (__global uchar *)(weights_addr + 3 * weights_stride_y));
-        CONVOLUTION1x9(pixels0, (__global uchar *)(src_addr + 4 * src_stride_y), (__global uchar *)(weights_addr + 4 * weights_stride_y));
-        CONVOLUTION1x9(pixels0, (__global uchar *)(src_addr + 5 * src_stride_y), (__global uchar *)(weights_addr + 5 * weights_stride_y));
-        CONVOLUTION1x9(pixels0, (__global uchar *)(src_addr + 6 * src_stride_y), (__global uchar *)(weights_addr + 6 * weights_stride_y));
-        CONVOLUTION1x9(pixels0, (__global uchar *)(src_addr + 7 * src_stride_y), (__global uchar *)(weights_addr + 7 * weights_stride_y));
-        CONVOLUTION1x9(pixels0, (__global uchar *)(src_addr + 8 * src_stride_y), (__global uchar *)(weights_addr + 8 * weights_stride_y));
+        CONVOLUTION1x9(pixels0, (__global DATA_TYPE *)(src_addr + 0 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 0 * weights_stride_y));
+        CONVOLUTION1x9(pixels0, (__global DATA_TYPE *)(src_addr + 1 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 1 * weights_stride_y));
+        CONVOLUTION1x9(pixels0, (__global DATA_TYPE *)(src_addr + 2 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 2 * weights_stride_y));
+        CONVOLUTION1x9(pixels0, (__global DATA_TYPE *)(src_addr + 3 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 3 * weights_stride_y));
+        CONVOLUTION1x9(pixels0, (__global DATA_TYPE *)(src_addr + 4 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 4 * weights_stride_y));
+        CONVOLUTION1x9(pixels0, (__global DATA_TYPE *)(src_addr + 5 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 5 * weights_stride_y));
+        CONVOLUTION1x9(pixels0, (__global DATA_TYPE *)(src_addr + 6 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 6 * weights_stride_y));
+        CONVOLUTION1x9(pixels0, (__global DATA_TYPE *)(src_addr + 7 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 7 * weights_stride_y));
+        CONVOLUTION1x9(pixels0, (__global DATA_TYPE *)(src_addr + 8 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 8 * weights_stride_y));
 #elif KERNEL_SIZE == 5
-        CONVOLUTION1x5(pixels0, (__global uchar *)src_addr, (__global uchar *)weights_addr);
-        CONVOLUTION1x5(pixels0, (__global uchar *)(src_addr + 1 * src_stride_y), (__global uchar *)(weights_addr + 1 * weights_stride_y));
-        CONVOLUTION1x5(pixels0, (__global uchar *)(src_addr + 2 * src_stride_y), (__global uchar *)(weights_addr + 2 * weights_stride_y));
-        CONVOLUTION1x5(pixels0, (__global uchar *)(src_addr + 3 * src_stride_y), (__global uchar *)(weights_addr + 3 * weights_stride_y));
-        CONVOLUTION1x5(pixels0, (__global uchar *)(src_addr + 4 * src_stride_y), (__global uchar *)(weights_addr + 4 * weights_stride_y));
+        CONVOLUTION1x5(pixels0, (__global DATA_TYPE *)src_addr, (__global DATA_TYPE *)weights_addr);
+        CONVOLUTION1x5(pixels0, (__global DATA_TYPE *)(src_addr + 1 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 1 * weights_stride_y));
+        CONVOLUTION1x5(pixels0, (__global DATA_TYPE *)(src_addr + 2 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 2 * weights_stride_y));
+        CONVOLUTION1x5(pixels0, (__global DATA_TYPE *)(src_addr + 3 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 3 * weights_stride_y));
+        CONVOLUTION1x5(pixels0, (__global DATA_TYPE *)(src_addr + 4 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 4 * weights_stride_y));
 #elif KERNEL_SIZE == 3
-        CONVOLUTION1x3(pixels0, (__global uchar *)(src_addr + 0 * src_stride_y), (__global uchar *)(weights_addr + 0 * weights_stride_y));
-        CONVOLUTION1x3(pixels0, (__global uchar *)(src_addr + 1 * src_stride_y), (__global uchar *)(weights_addr + 1 * weights_stride_y));
-        CONVOLUTION1x3(pixels0, (__global uchar *)(src_addr + 2 * src_stride_y), (__global uchar *)(weights_addr + 2 * weights_stride_y));
+        CONVOLUTION1x3(pixels0, (__global DATA_TYPE *)(src_addr + 0 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 0 * weights_stride_y));
+        CONVOLUTION1x3(pixels0, (__global DATA_TYPE *)(src_addr + 1 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 1 * weights_stride_y));
+        CONVOLUTION1x3(pixels0, (__global DATA_TYPE *)(src_addr + 2 * src_stride_y), (__global DATA_TYPE *)(weights_addr + 2 * weights_stride_y));
 #elif KERNEL_SIZE == 1
-        int weight       = convert_int(*(__global uchar *)weights_addr);
-        int8 input_pixel = convert_int8(INPUT_PIXEL((__global uchar *)src_addr));
+        int weight       = convert_int(*(__global DATA_TYPE *)weights_addr);
+        int8 input_pixel = convert_int8(INPUT_PIXEL((__global DATA_TYPE *)src_addr));
         pixels0 += (input_pixel + input_offset) * ((int8)weight + weight_offset);
 #endif /* (KERNEL_SIZE == 1) || (KERNEL_SIZE == 3) || (KERNEL_SIZE == 5) */
 
@@ -299,6 +306,6 @@ __kernel void direct_convolution_quantized(
 #endif // OUTPUT_SHIFT < 0
     pixels0 = pixels0 + output_offset;
 
-    vstore8(convert_uchar8_sat(pixels0), 0, (__global uchar *)dst.ptr);
+    vstore8(CONVERT_SAT(pixels0, DATA_TYPE), 0, (__global DATA_TYPE *)dst.ptr);
 }
 #endif // defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) && defined(OUTPUT_MULTIPLIER) && defined(OUTPUT_SHIFT)
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index 312890343f..21ce8b83be 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -45,7 +45,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
 
     const DataLayout data_layout = input->data_layout();
diff --git a/tests/validation/CL/DirectConvolutionLayer.cpp b/tests/validation/CL/DirectConvolutionLayer.cpp
index 15b6c838fa..f5cb843f42 100644
--- a/tests/validation/CL/DirectConvolutionLayer.cpp
+++ b/tests/validation/CL/DirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -295,6 +295,40 @@ FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesQuantiz
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE_END() // QASYMM8_CustomDataset
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(data_precommit, framework::dataset::make("DataType",
+                                                                                                                        DataType::QASYMM8_SIGNED)),
+                                                                                                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, -10) })),
+                                                                                                                        QuantizedActivationFunctionsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(data_precommit_9x9,
+                       framework::dataset::make("DataType",
+                                                DataType::QASYMM8_SIGNED)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })),
+                       QuantizedActivationFunctionsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunCustomDataset, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(datasets::DirectConvolutionLayerDataset(),
+                                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) })),
+                               QuantizedActivationFunctionsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+
 TEST_SUITE_END() // Quantized
 
 TEST_SUITE_END() // DirectConvolutionLayer
diff --git a/tests/validation/fixtures/DirectConvolutionLayerFixture.h b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
index e2f9554164..fc36547c53 100644
--- a/tests/validation/fixtures/DirectConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,7 +49,7 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
 class DirectConvolutionValidationGenericFixture : public framework::Fixture
 {
 public:
-    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type;
+    using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type;
 
 public:
     template <typename...>
@@ -103,6 +103,13 @@ protected:
                 library->fill(tensor, distribution, i);
                 break;
             }
+            case DataType::QASYMM8_SIGNED:
+            {
+                std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f);
+                std::uniform_int_distribution<int8_t> distribution(bounds.first, bounds.second);
+                library->fill(tensor, distribution, i);
+                break;
+            }
             case DataType::F16:
             case DataType::F32:
             {
-- 
cgit v1.2.1