COMPMID-2412: Add QSYMM16 support for ElementwiseAddition for CL

Arithmetic addition uses the same code as other element-wise operations. Hence, adding QSYMM16 support for addition automatically adds the same support for: - arithmetic subtraction - element-wise min - element-wise max - squared difference Change-Id: If986102844f62e29dd23c03f9245910db43f9043 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/1384 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
author: Michele Di Giorgio <michele.digiorgio@arm.com> 2019-06-18 10:23:22 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2019-06-25 09:37:00 +0000
commit: 6997fc951e48a1bf8f7591f3b2c4c8d721331b96 (patch)
tree: 1cc2b28f5b2a5dbb8d7eb32755df4e8f28a1901d
parent: 944170e1591ff23c9e6ede2201f0f6aba0f3439b (diff)
download: ComputeLibrary-6997fc951e48a1bf8f7591f3b2c4c8d721331b96.tar.gz
12 files changed, 454 insertions, 189 deletions
diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
index ed42c9b99a..8c5cdb5690 100644
--- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
@@ -115,7 +115,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
      *
      * @param[in] op     Arithmetic operation to be executed.
-     * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/U16/S16/QSYMM16/F16/U32/S32/F32.
      * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
      * @param[in] output Output tensor. Data types supported: Same as @p input1.
      * @param[in] policy Policy to use to handle overflow.
@@ -125,7 +125,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
      *
      * @param[in] op     Arithmetic operation to be executed.
-     * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/U16/S16/QSYMM16/F16/U32/S32/F32.
      * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
      * @param[in] output Output tensor info. Data types supported: Same as @p input1.
      * @param[in] policy Policy to use to handle overflow.
@@ -158,7 +158,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
      *
      * @param[in] op     Arithmetic operation to be executed.
-     * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/U16/S16/QSYMM16/F16/U32/S32/F32.
      * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
      * @param[in] output Output tensor. Data types supported: Same as @p input1.
      */
@@ -167,7 +167,7 @@ public:
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
      *
      * @param[in] op     Arithmetic operation to be executed.
-     * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/U16/S16/QSYMM16/F16/U32/S32/F32.
      * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
      * @param[in] output Output tensor info. Data types supported: Same as @p input1.
      *
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index b711451453..a37559d269 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -1022,7 +1022,7 @@ inline bool is_data_type_quantized(DataType dt)
  *
  * @param[in] dt Input data type.
  *
- * @return True if data type is of symmetric quantized type, else false.
+ * @return True if data type is of asymmetric quantized type, else false.
  */
 inline bool is_data_type_quantized_asymmetric(DataType dt)
 {
@@ -1035,6 +1035,25 @@ inline bool is_data_type_quantized_asymmetric(DataType dt)
     }
 }
 
+/** Check if a given data type is of symmetric quantized type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of symmetric quantized type, else false.
+ */
+inline bool is_data_type_quantized_symmetric(DataType dt)
+{
+    switch(dt)
+    {
+        case DataType::QSYMM8:
+        case DataType::QSYMM8_PER_CHANNEL:
+        case DataType::QSYMM16:
+            return true;
+        default:
+            return false;
+    }
+}
+
 /** Create a string with the float in full precision.
  *
  * @param val Floating point value
diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
index 27215e81c1..4cbec62371 100644
--- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
+++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
@@ -33,7 +33,7 @@ class ICLTensor;
 
 /** Basic function to run @ref CLSaturatedArithmeticOperationKernel for addition
  *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/U32/F16/F32.
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
  * @note The function performs an arithmetic addition between two tensors.
  */
 class CLArithmeticAddition : public ICLSimpleFunction
@@ -41,19 +41,19 @@ class CLArithmeticAddition : public ICLSimpleFunction
 public:
     /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
+     * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
      *                        The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+     * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32.
      *                        The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[out]     output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32.
      * @param[in]      policy Policy to use to handle overflow.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for addition
      *
-     * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
-     * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
-     * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32.
+     * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32.
      * @param[in] policy Policy to use to handle overflow.
      *
      * @return a status
@@ -121,7 +121,7 @@ public:
 
 /** Basic function to run @ref CLArithmeticOperationKernel for max
  *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/U32/F16/F32.
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
  * @note The function performs a max operation between two tensors.
  */
 class CLElementwiseMax : public ICLSimpleFunction
@@ -129,18 +129,18 @@ class CLElementwiseMax : public ICLSimpleFunction
 public:
     /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
+     * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
      *                        The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+     * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
      *                        The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[out]     output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for max
      *
-     * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
-     * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
-     * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
      *
      * @return a status
      */
@@ -149,7 +149,7 @@ public:
 
 /** Basic function to run @ref CLArithmeticOperationKernel for min
  *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/U32/F16/F32.
+ * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
  * @note The function performs a max operation between two tensors.
  */
 class CLElementwiseMin : public ICLSimpleFunction
@@ -157,18 +157,18 @@ class CLElementwiseMin : public ICLSimpleFunction
 public:
     /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
+     * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
      *                        The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+     * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
      *                        The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[out]     output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for min
      *
-     * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
-     * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
-     * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
      *
      * @return a status
      */
@@ -177,7 +177,7 @@ public:
 
 /** Basic function to run @ref CLArithmeticOperationKernel for squared difference
  *
- * @note The tensor data type for the inputs must be QASYMM8/U8/S16/F16/F32.
+ * @note The tensor data type for the inputs must be QASYMM8/U8/S16/QSYMM16/F16/F32.
  * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
  */
 class CLElementwiseSquaredDiff : public ICLSimpleFunction
@@ -185,18 +185,18 @@ class CLElementwiseSquaredDiff : public ICLSimpleFunction
 public:
     /** Initialise the kernel's inputs, output and conversion policy.
      *
-     * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/F16/F32.
+     * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
      *                        The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+     * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
      *                        The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[out]     output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for squared difference
      *
-     * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/F16/F32.
-     * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
-     * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
+     * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
      *
      * @return a status
      */
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 2e6ceb4433..e80349e486 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -45,6 +45,7 @@ std::string get_cl_type_from_data_type(const DataType &dt)
         case DataType::U16:
             return "ushort";
         case DataType::S16:
+        case DataType::QSYMM16:
             return "short";
         case DataType::U32:
             return "uint";
@@ -78,6 +79,7 @@ std::string get_cl_select_type_from_data_type(const DataType &dt)
             return "ushort";
         case DataType::F16:
         case DataType::S16:
+        case DataType::QSYMM16:
             return "short";
         case DataType::U32:
             return "uint";
@@ -105,6 +107,7 @@ std::string get_data_size_from_data_type(const DataType &dt)
             return "8";
         case DataType::U16:
         case DataType::S16:
+        case DataType::QSYMM16:
         case DataType::F16:
             return "16";
         case DataType::U32:
@@ -246,6 +249,7 @@ size_t preferred_vector_width(const cl::Device &device, const DataType dt)
             return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();
         case DataType::U16:
         case DataType::S16:
+        case DataType::QSYMM16:
             return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT>();
         case DataType::U32:
         case DataType::S32:
diff --git a/src/core/CL/cl_kernels/elementwise_operation_quantized.cl b/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
index 1b45da164f..a23ae2b005 100644
--- a/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
+++ b/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
@@ -37,11 +37,11 @@
 #define OP_FUN_NAME_STR(op) elementwise_operation_##op##_quantized
 #define OP_FUN_NAME(op) OP_FUN_NAME_STR(op)
 
-#if defined(OP) && defined(VEC_SIZE) && defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT)
+#if defined(OP) && defined(VEC_SIZE) && defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(DATA_TYPE_OUT)
 
 #define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
 #define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_UCHAR VEC_DATA_TYPE(uchar, VEC_SIZE)
+#define VEC_TYPE VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)
 
 /** This function executes an element-wise operation among two tensors.
  *
@@ -54,8 +54,10 @@
  * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
  * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
  * @attention The element-wise operation to be executed has to be passed at compile time using -DOP (e.g., -DOP=ADD)
+ * @attention For QSYMM16 operations OFFSET_IN1, OFFSET_IN2 and OFFSET_OUT must be set to zero
+ * @attention The data type must be passed at compile time using -DDATA_TYPE_OUT, i.e. -DDATA_TYPE_OUT=uchar
  *
- * @param[in]  in1_ptr                           Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in]  in1_ptr                           Pointer to the source tensor. Supported data types: QASYMM8/QSYMM16
  * @param[in]  in1_stride_x                      Stride of the source tensor in X dimension (in bytes)
  * @param[in]  in1_step_x                        in1_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  in1_stride_y                      Stride of the source tensor in Y dimension (in bytes)
@@ -90,8 +92,8 @@ __kernel void OP_FUN_NAME(OP)(
     Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
     Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
 
-    VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in1.ptr), VEC_INT);
-    VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in2.ptr), VEC_INT);
+    VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_OUT *)in1.ptr), VEC_INT);
+    VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_OUT *)in2.ptr), VEC_INT);
 
     in_a = SUB(in_a, (VEC_INT)((int)OFFSET_IN1));
     in_b = SUB(in_b, (VEC_INT)((int)OFFSET_IN2));
@@ -99,10 +101,10 @@ __kernel void OP_FUN_NAME(OP)(
     const VEC_FLOAT in1f32  = CONVERT(in_a, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN1);
     const VEC_FLOAT in2f32  = CONVERT(in_b, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN2);
     const VEC_FLOAT qresf32 = OP(in1f32, in2f32) / ((VEC_FLOAT)(float)SCALE_OUT) + ((VEC_FLOAT)((float)OFFSET_OUT));
-    const VEC_UCHAR res     = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_UCHAR);
+    const VEC_TYPE  res     = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_TYPE);
 
     // Store result
     VSTORE(VEC_SIZE)
-    (res, 0, (__global uchar *)out.ptr);
+    (res, 0, (__global DATA_TYPE_OUT *)out.ptr);
 }
-#endif /* defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) */
+#endif /* defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(DATA_TYPE_OUT) */
diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
index 1d9c71555a..4c191de0bd 100644
--- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
+++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
@@ -92,14 +92,22 @@ Status validate_arguments_with_float_only_supported_rules(const ITensorInfo &inp
 Status validate_arguments_with_arithmetic_rules(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input2);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
 
-    const bool is_qasymm = is_data_type_quantized_asymmetric(input1.data_type()) || is_data_type_quantized_asymmetric(input2.data_type());
-    if(is_qasymm)
+    const bool is_quantized = is_data_type_quantized(input1.data_type()) || is_data_type_quantized(input2.data_type());
+    if(is_quantized)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);
+
+        if(is_data_type_quantized_symmetric(input1.data_type()))
+        {
+            const int32_t in1_offset = input1.quantization_info().uniform().offset;
+            const int32_t in2_offset = input2.quantization_info().uniform().offset;
+            ARM_COMPUTE_RETURN_ERROR_ON_MSG(in1_offset != 0, "For quantized symmetric, offset must be zero");
+            ARM_COMPUTE_RETURN_ERROR_ON_MSG(in2_offset != 0, "For quantized symmetric, offset must be zero");
+        }
     }
 
     const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
@@ -110,14 +118,21 @@ Status validate_arguments_with_arithmetic_rules(const ITensorInfo &input1, const
     if(output.total_size() > 0)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&output);
-        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
         ARM_COMPUTE_RETURN_ERROR_ON_MSG((output.data_type() == DataType::U8) && ((input1.data_type() != DataType::U8) || (input2.data_type() != DataType::U8)),
                                         "Output can only be U8 if both inputs are U8");
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
                                         "Wrong shape for output");
-        if(is_qasymm)
+
+        if(is_quantized)
         {
             ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &output);
+
+            if(is_data_type_quantized_symmetric(output.data_type()))
+            {
+                const int32_t offset = output.quantization_info().uniform().offset;
+                ARM_COMPUTE_RETURN_ERROR_ON_MSG(offset != 0, "For quantized symmetric, offset must be zero");
+            }
         }
     }
     return Status{};
@@ -132,7 +147,7 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &i
     build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output.data_type()));
     build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
     build_opts.add_option("-DOP=" + operation_string);
-    if(is_data_type_quantized_asymmetric(input1.data_type()))
+    if(is_data_type_quantized(input1.data_type()))
     {
         const UniformQuantizationInfo iq1info = input1.quantization_info().uniform();
         const UniformQuantizationInfo iq2info = input2.quantization_info().uniform();
@@ -188,6 +203,14 @@ std::pair<Status, Window> validate_and_configure_window_for_arithmetic_operators
     {
         set_format_if_unknown(output, Format::F32);
     }
+    else if(input1.data_type() == DataType::QASYMM8 || input2.data_type() == DataType::QASYMM8)
+    {
+        set_data_type_if_unknown(output, DataType::QASYMM8);
+    }
+    else if(input1.data_type() == DataType::QSYMM16 || input2.data_type() == DataType::QSYMM16)
+    {
+        set_data_type_if_unknown(output, DataType::QSYMM16);
+    }
 
     return configure_window_arithmetic_common(valid_region, input1, input2, output);
 }
@@ -221,7 +244,7 @@ void CLElementwiseOperationKernel::configure_common(const ICLTensor *input1, con
     _output = output;
 
     std::string kernel_name = "elementwise_operation_" + name();
-    if(is_data_type_quantized_asymmetric(input1->info()->data_type()))
+    if(is_data_type_quantized(input1->info()->data_type()))
     {
         kernel_name += "_quantized";
     }
diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp
index bed04af9e5..e9b4b16b0c 100644
--- a/tests/validation/CL/ArithmeticAddition.cpp
+++ b/tests/validation/CL/ArithmeticAddition.cpp
@@ -50,6 +50,9 @@ const auto ArithmeticAdditionU8Dataset = combine(combine(framework::dataset::mak
 const auto ArithmeticAdditionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
                                                       framework::dataset::make("DataType",
                                                                                DataType::QASYMM8));
+const auto ArithmeticAdditionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
+                                                      framework::dataset::make("DataType",
+                                                                               DataType::QSYMM16));
 const auto ArithmeticAdditionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                                   framework::dataset::make("DataType", DataType::S16));
 const auto ArithmeticAdditionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
@@ -93,6 +96,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 template <typename T>
 using CLArithmeticAdditionFixture = ArithmeticAdditionValidationFixture<CLTensor, CLAccessor, CLArithmeticAddition, T>;
 
+TEST_SUITE(Integer)
 TEST_SUITE(U8)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                shape, policy)
@@ -123,7 +127,48 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<uint8_t>, framework
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END()
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
+                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+               shape, data_type, policy)
+{
+    // Create tensors
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::S16);
+
+    // Create and Configure function
+    CLArithmeticAddition add;
+    add.configure(&ref_src1, &ref_src2, &dst, policy);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding();
+    validate(ref_src1.info()->padding(), padding);
+    validate(ref_src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS16Dataset),
+                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionS16Dataset),
+                                                                                                                framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // S16
+TEST_SUITE_END() // Integer
 
 template <typename T>
 using CLArithmeticAdditionQuantizedFixture = ArithmeticAdditionValidationQuantizedFixture<CLTensor, CLAccessor, CLArithmeticAddition, T>;
@@ -156,27 +201,22 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Sma
 FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                        ArithmeticAdditionQASYMM8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))
-
-                      )
+                       framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(S16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
-                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-               shape, data_type, policy)
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE(QSYMM16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+               shape, policy)
 {
     // Create tensors
-    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type);
-    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16);
-    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, DataType::QSYMM16);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::QSYMM16);
+    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::QSYMM16);
 
     // Create and Configure function
     CLArithmeticAddition add;
@@ -193,20 +233,18 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datas
     validate(dst.info()->padding(), padding);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS16Dataset),
-                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionS16Dataset),
-                                                                                                                framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
+                       ArithmeticAdditionQSYMM16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+                       framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END()
+TEST_SUITE_END() // QSYMM16
+TEST_SUITE_END() // Quantized
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -216,7 +254,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<half>, framework::D
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END()
+TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
@@ -274,11 +312,11 @@ FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticAdditionBroadcastFixture<f
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
 
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // ArithmeticAddition
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/ArithmeticSubtraction.cpp b/tests/validation/CL/ArithmeticSubtraction.cpp
index 796486bf02..b6a8139c0d 100644
--- a/tests/validation/CL/ArithmeticSubtraction.cpp
+++ b/tests/validation/CL/ArithmeticSubtraction.cpp
@@ -51,6 +51,9 @@ const auto ArithmeticSubtractionU8Dataset = combine(combine(framework::dataset::
 const auto ArithmeticSubtractionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
                                                          framework::dataset::make("DataType",
                                                                                   DataType::QASYMM8));
+const auto ArithmeticSubtractionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
+                                                         framework::dataset::make("DataType",
+                                                                                  DataType::QSYMM16));
 const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                                      framework::dataset::make("DataType", DataType::S16));
 const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
@@ -94,6 +97,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 template <typename T>
 using CLArithmeticSubtractionFixture = ArithmeticSubtractionValidationFixture<CLTensor, CLAccessor, CLArithmeticSubtraction, T>;
 
+TEST_SUITE(Integer)
 TEST_SUITE(U8)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                shape, policy)
@@ -124,7 +128,48 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framew
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END()
+TEST_SUITE_END() // U8
+
+TEST_SUITE(S16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
+                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+               shape, data_type, policy)
+{
+    // Create tensors
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::S16);
+
+    // Create and Configure function
+    CLArithmeticSubtraction add;
+    add.configure(&ref_src1, &ref_src2, &dst, policy);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding();
+    validate(ref_src1.info()->padding(), padding);
+    validate(ref_src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS16Dataset),
+                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS16Dataset),
+                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // S16
+TEST_SUITE_END() // Integer
 
 template <typename T>
 using CLArithmeticSubtractionQuantizedFixture = ArithmeticSubtractionValidationQuantizedFixture<CLTensor, CLAccessor, CLArithmeticSubtraction, T>;
@@ -157,31 +202,26 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Sma
 FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                        ArithmeticSubtractionQASYMM8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))
-
-                      )
+                       framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(S16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
-                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-               shape, data_type, policy)
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE(QSYMM16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+               shape, policy)
 {
     // Create tensors
-    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type);
-    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16);
-    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, DataType::QSYMM16);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::QSYMM16);
+    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::QSYMM16);
 
     // Create and Configure function
-    CLArithmeticSubtraction add;
-    add.configure(&ref_src1, &ref_src2, &dst, policy);
+    CLArithmeticSubtraction sub;
+    sub.configure(&ref_src1, &ref_src2, &dst, policy);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -194,20 +234,18 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datas
     validate(dst.info()->padding(), padding);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS16Dataset),
-                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS16Dataset),
-                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
+                       ArithmeticSubtractionQSYMM16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+                       framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END()
+TEST_SUITE_END() // QSYMM16
+TEST_SUITE_END() // Quantized
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -217,7 +255,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<half>, framework
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END()
+TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
@@ -275,11 +313,11 @@ FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticSubtractionBroadcastFixtur
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
 
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // ArithmeticSubtraction
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/ElementwiseMax.cpp b/tests/validation/CL/ElementwiseMax.cpp
index 773a91905e..255acbe645 100644
--- a/tests/validation/CL/ElementwiseMax.cpp
+++ b/tests/validation/CL/ElementwiseMax.cpp
@@ -53,6 +53,9 @@ const auto ElementwiseMaxU8Dataset = combine(combine(framework::dataset::make("D
 const auto ElementwiseMaxQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
                                                   framework::dataset::make("DataType",
                                                                            DataType::QASYMM8));
+const auto ElementwiseMaxQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
+                                                  framework::dataset::make("DataType",
+                                                                           DataType::QSYMM16));
 const auto ElementwiseMaxS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                               framework::dataset::make("DataType", DataType::S16));
 const auto ElementwiseMaxFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
@@ -96,6 +99,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 template <typename T>
 using CLElementwiseMaxFixture = ElementwiseMaxValidationFixture<CLTensor, CLAccessor, CLElementwiseMax, T>;
 
+TEST_SUITE(Integer)
 TEST_SUITE(U8)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes(),
                shape)
@@ -106,8 +110,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
     CLTensor dst      = create_tensor<CLTensor>(shape, DataType::U8);
 
     // Create and Configure function
-    CLElementwiseMax add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseMax max;
+    max.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -127,6 +131,38 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<uint8_t>, framework::Da
 }
 TEST_SUITE_END()
 
+TEST_SUITE(S16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
+               shape, data_type)
+{
+    // Create tensors
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::S16);
+
+    // Create and Configure function
+    CLElementwiseMax max;
+    max.configure(&ref_src1, &ref_src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding();
+    validate(ref_src1.info()->padding(), padding);
+    validate(ref_src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 template <typename T>
 using CLElementwiseMaxQuantizedFixture = ElementwiseMaxValidationQuantizedFixture<CLTensor, CLAccessor, CLElementwiseMax, T>;
 
@@ -141,8 +177,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
     CLTensor dst      = create_tensor<CLTensor>(shape, DataType::QASYMM8);
 
     // Create and Configure function
-    CLElementwiseMax add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseMax max;
+    max.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -157,30 +193,26 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
                                                                                                                        ElementwiseMaxQASYMM8Dataset),
-                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
-                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))
-
-                      )
+                                                                                                                       framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+                                                                                                                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
 }
 TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(S16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
-               shape, data_type)
+TEST_SUITE(QSYMM16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes(),
+               shape)
 {
     // Create tensors
-    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type);
-    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16);
-    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, DataType::QSYMM16);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::QSYMM16);
+    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::QSYMM16);
 
     // Create and Configure function
-    CLElementwiseMax add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseMax max;
+    max.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -193,12 +225,17 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Sma
     validate(dst.info()->padding(), padding);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+                                                                                                                       ElementwiseMaxQSYMM16Dataset),
+                                                                                                                       framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                                                                                                                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -219,8 +256,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
     CLTensor dst      = create_tensor<CLTensor>(shape, DataType::F32);
 
     // Create and Configure function
-    CLElementwiseMax add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseMax max;
+    max.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -251,8 +288,8 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMaxBroadcastFixture<float
 TEST_SUITE_END()
 TEST_SUITE_END()
 
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // ElementwiseMax
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/ElementwiseMin.cpp b/tests/validation/CL/ElementwiseMin.cpp
index b6486fccc7..01d18967f8 100644
--- a/tests/validation/CL/ElementwiseMin.cpp
+++ b/tests/validation/CL/ElementwiseMin.cpp
@@ -53,6 +53,9 @@ const auto ElementwiseMinU8Dataset = combine(combine(framework::dataset::make("D
 const auto ElementwiseMinQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
                                                   framework::dataset::make("DataType",
                                                                            DataType::QASYMM8));
+const auto ElementwiseMinQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
+                                                  framework::dataset::make("DataType",
+                                                                           DataType::QSYMM16));
 const auto ElementwiseMinS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                               framework::dataset::make("DataType", DataType::S16));
 const auto ElementwiseMinFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
@@ -96,6 +99,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 template <typename T>
 using CLElementwiseMinFixture = ElementwiseMinValidationFixture<CLTensor, CLAccessor, CLElementwiseMin, T>;
 
+TEST_SUITE(Integer)
 TEST_SUITE(U8)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes(),
                shape)
@@ -106,8 +110,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
     CLTensor dst      = create_tensor<CLTensor>(shape, DataType::U8);
 
     // Create and Configure function
-    CLElementwiseMin add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseMin min;
+    min.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -127,6 +131,38 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<uint8_t>, framework::Da
 }
 TEST_SUITE_END()
 
+TEST_SUITE(S16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
+               shape, data_type)
+{
+    // Create tensors
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::S16);
+
+    // Create and Configure function
+    CLElementwiseMin min;
+    min.configure(&ref_src1, &ref_src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding();
+    validate(ref_src1.info()->padding(), padding);
+    validate(ref_src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMinS16Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 template <typename T>
 using CLElementwiseMinQuantizedFixture = ElementwiseMinValidationQuantizedFixture<CLTensor, CLAccessor, CLElementwiseMin, T>;
 
@@ -141,8 +177,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
     CLTensor dst      = create_tensor<CLTensor>(shape, DataType::QASYMM8);
 
     // Create and Configure function
-    CLElementwiseMin add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseMin min;
+    min.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -157,30 +193,26 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
                                                                                                                        ElementwiseMinQASYMM8Dataset),
-                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
-                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))
-
-                      )
+                                                                                                                       framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+                                                                                                                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
 }
 TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(S16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
-               shape, data_type)
+TEST_SUITE(QSYMM16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes(),
+               shape)
 {
     // Create tensors
-    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type);
-    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16);
-    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, DataType::QSYMM16);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::QSYMM16);
+    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::QSYMM16);
 
     // Create and Configure function
-    CLElementwiseMin add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseMin min;
+    min.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -193,12 +225,17 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Sma
     validate(dst.info()->padding(), padding);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMinS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+                                                                                                                       ElementwiseMinQSYMM16Dataset),
+                                                                                                                       framework::dataset::make("SrcQInfo0", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                                                                                                                       framework::dataset::make("SrcQInfo1", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -219,8 +256,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
     CLTensor dst      = create_tensor<CLTensor>(shape, DataType::F32);
 
     // Create and Configure function
-    CLElementwiseMin add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseMin min;
+    min.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -250,8 +287,8 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMinBroadcastFixture<float
 TEST_SUITE_END()
 TEST_SUITE_END()
 
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // ElementwiseMin
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/ElementwiseSquaredDiff.cpp b/tests/validation/CL/ElementwiseSquaredDiff.cpp
index 35fecf127c..edc150109e 100644
--- a/tests/validation/CL/ElementwiseSquaredDiff.cpp
+++ b/tests/validation/CL/ElementwiseSquaredDiff.cpp
@@ -45,6 +45,7 @@ namespace
 {
 RelativeTolerance<float> tolerance_fp32(0.000001f);
 RelativeTolerance<float> tolerance_fp16(0.001f);
+AbsoluteTolerance<float> tolerance_qsymm16(1);
 
 constexpr unsigned int num_elems_processed_per_iteration = 16;
 /** Input data sets **/
@@ -54,6 +55,9 @@ const auto ElementwiseSquaredDiffU8Dataset = combine(combine(framework::dataset:
 const auto ElementwiseSquaredDiffQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
                                                           framework::dataset::make("DataType",
                                                                                    DataType::QASYMM8));
+const auto ElementwiseSquaredDiffQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
+                                                          framework::dataset::make("DataType",
+                                                                                   DataType::QSYMM16));
 const auto ElementwiseSquaredDiffS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                                       framework::dataset::make("DataType", DataType::S16));
 const auto ElementwiseSquaredDiffFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
@@ -97,6 +101,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 template <typename T>
 using CLElementwiseSquaredDiffFixture = ElementwiseSquaredDiffValidationFixture<CLTensor, CLAccessor, CLElementwiseSquaredDiff, T>;
 
+TEST_SUITE(Integer)
 TEST_SUITE(U8)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes(),
                shape)
@@ -107,8 +112,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
     CLTensor dst      = create_tensor<CLTensor>(shape, DataType::U8);
 
     // Create and Configure function
-    CLElementwiseSquaredDiff add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseSquaredDiff sqdiff;
+    sqdiff.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -128,6 +133,38 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<uint8_t>, frame
 }
 TEST_SUITE_END()
 
+TEST_SUITE(S16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
+               shape, data_type)
+{
+    // Create tensors
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::S16);
+
+    // Create and Configure function
+    CLElementwiseSquaredDiff sqdiff;
+    sqdiff.configure(&ref_src1, &ref_src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding();
+    validate(ref_src1.info()->padding(), padding);
+    validate(ref_src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 template <typename T>
 using CLElementwiseSquaredDiffQuantizedFixture = ElementwiseSquaredDiffValidationQuantizedFixture<CLTensor, CLAccessor, CLElementwiseSquaredDiff, T>;
 
@@ -142,8 +179,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
     CLTensor dst      = create_tensor<CLTensor>(shape, DataType::QASYMM8);
 
     // Create and Configure function
-    CLElementwiseSquaredDiff add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseSquaredDiff sqdiff;
+    sqdiff.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -158,30 +195,26 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
                        ElementwiseSquaredDiffQASYMM8Dataset),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))
-
-                      )
+                       framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
 }
 TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(S16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
-               shape, data_type)
+TEST_SUITE(QSYMM16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes(),
+               shape)
 {
     // Create tensors
-    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type);
-    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16);
-    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, DataType::QSYMM16);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::QSYMM16);
+    CLTensor dst      = create_tensor<CLTensor>(shape, DataType::QSYMM16);
 
     // Create and Configure function
-    CLElementwiseSquaredDiff add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseSquaredDiff sqdiff;
+    sqdiff.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -194,12 +227,17 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Sma
     validate(dst.info()->padding(), padding);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+                       ElementwiseSquaredDiffQSYMM16Dataset),
+                       framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference);
+    validate(CLAccessor(_target), _reference, tolerance_qsymm16);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -220,8 +258,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, datasets::SmallShapes
     CLTensor dst      = create_tensor<CLTensor>(shape, DataType::F32);
 
     // Create and Configure function
-    CLElementwiseSquaredDiff add;
-    add.configure(&ref_src1, &ref_src2, &dst);
+    CLElementwiseSquaredDiff sqdiff;
+    sqdiff.configure(&ref_src1, &ref_src2, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
@@ -251,8 +289,8 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseSquaredDiffBroadcastFixtu
 TEST_SUITE_END()
 TEST_SUITE_END()
 
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // ElementwiseSquaredDiff
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp
index 44eb417969..d5a37a0fae 100644
--- a/tests/validation/reference/ElementwiseOperations.cpp
+++ b/tests/validation/reference/ElementwiseOperations.cpp
@@ -184,10 +184,39 @@ SimpleTensor<uint8_t> arithmetic_operation(ArithmeticOperation op, const SimpleT
     }
 }
 
+template <>
+SimpleTensor<int16_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, SimpleTensor<int16_t> &dst, ConvertPolicy convert_policy)
+{
+    if(dst.data_type() == DataType::QSYMM16)
+    {
+        SimpleTensor<float> src1_tmp = convert_from_symmetric<int16_t>(src1);
+        SimpleTensor<float> src2_tmp = convert_from_symmetric<int16_t>(src2);
+        SimpleTensor<float> dst_tmp(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst.data_type());
+
+        Coordinates id_src1{};
+        Coordinates id_src2{};
+        Coordinates id_dst{};
+
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst_tmp, convert_policy, id_src1, id_src2, id_dst);
+
+        dst = convert_to_symmetric<int16_t>(dst_tmp, dst.quantization_info());
+        return dst;
+    }
+    else
+    {
+        // DataType::S16
+        Coordinates id_src1{};
+        Coordinates id_src2{};
+        Coordinates id_dst{};
+
+        BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
+
+        return dst;
+    }
+}
+
 template SimpleTensor<int32_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int32_t> &src1, const SimpleTensor<int32_t> &src2, SimpleTensor<int32_t> &dst,
                                                     ConvertPolicy convert_policy);
-template SimpleTensor<int16_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, SimpleTensor<int16_t> &dst,
-                                                    ConvertPolicy convert_policy);
 template SimpleTensor<int8_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<int8_t> &src1, const SimpleTensor<int8_t> &src2, SimpleTensor<int8_t> &dst,
                                                    ConvertPolicy convert_policy);
 template SimpleTensor<half> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<half> &src1, const SimpleTensor<half> &src2, SimpleTensor<half> &dst, ConvertPolicy convert_policy);
@@ -196,7 +225,7 @@ template SimpleTensor<float> arithmetic_operation(ArithmeticOperation op, const
 template <typename T>
 SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy)
 {
-    ARM_COMPUTE_ERROR_ON_MSG(dst_data_type == DataType::QASYMM8, "For QASYMM8, the quantized output tensor should be passed directly.");
+    ARM_COMPUTE_ERROR_ON_MSG(is_data_type_quantized(dst_data_type), "For quantized data types, the quantized output tensor should be passed directly.");
 
     SimpleTensor<T> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst_data_type);
     arithmetic_operation<T>(op, src1, src2, dst, convert_policy);
author	Michele Di Giorgio <michele.digiorgio@arm.com>	2019-06-18 10:23:22 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2019-06-25 09:37:00 +0000
commit	6997fc951e48a1bf8f7591f3b2c4c8d721331b96 (patch)
tree	1cc2b28f5b2a5dbb8d7eb32755df4e8f28a1901d
parent	944170e1591ff23c9e6ede2201f0f6aba0f3439b (diff)
download	ComputeLibrary-6997fc951e48a1bf8f7591f3b2c4c8d721331b96.tar.gz