COMPMID-2412: Add QSYMM16 support for ElementwiseAddition for CL

Arithmetic addition uses the same code as other element-wise operations. Hence, adding QSYMM16 support for addition automatically adds the same support for: - arithmetic subtraction - element-wise min - element-wise max - squared difference Change-Id: If986102844f62e29dd23c03f9245910db43f9043 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/1384 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
author: Michele Di Giorgio <michele.digiorgio@arm.com> 2019-06-18 10:23:22 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2019-06-25 09:37:00 +0000
commit: 6997fc951e48a1bf8f7591f3b2c4c8d721331b96 (patch)
tree: 1cc2b28f5b2a5dbb8d7eb32755df4e8f28a1901d /src
parent: 944170e1591ff23c9e6ede2201f0f6aba0f3439b (diff)
download: ComputeLibrary-6997fc951e48a1bf8f7591f3b2c4c8d721331b96.tar.gz
3 files changed, 45 insertions, 16 deletions
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 2e6ceb4433..e80349e486 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -45,6 +45,7 @@ std::string get_cl_type_from_data_type(const DataType &dt)
         case DataType::U16:
             return "ushort";
         case DataType::S16:
+        case DataType::QSYMM16:
             return "short";
         case DataType::U32:
             return "uint";
@@ -78,6 +79,7 @@ std::string get_cl_select_type_from_data_type(const DataType &dt)
             return "ushort";
         case DataType::F16:
         case DataType::S16:
+        case DataType::QSYMM16:
             return "short";
         case DataType::U32:
             return "uint";
@@ -105,6 +107,7 @@ std::string get_data_size_from_data_type(const DataType &dt)
             return "8";
         case DataType::U16:
         case DataType::S16:
+        case DataType::QSYMM16:
         case DataType::F16:
             return "16";
         case DataType::U32:
@@ -246,6 +249,7 @@ size_t preferred_vector_width(const cl::Device &device, const DataType dt)
             return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();
         case DataType::U16:
         case DataType::S16:
+        case DataType::QSYMM16:
             return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT>();
         case DataType::U32:
         case DataType::S32:
diff --git a/src/core/CL/cl_kernels/elementwise_operation_quantized.cl b/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
index 1b45da164f..a23ae2b005 100644
--- a/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
+++ b/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
@@ -37,11 +37,11 @@
 #define OP_FUN_NAME_STR(op) elementwise_operation_##op##_quantized
 #define OP_FUN_NAME(op) OP_FUN_NAME_STR(op)
 
-#if defined(OP) && defined(VEC_SIZE) && defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT)
+#if defined(OP) && defined(VEC_SIZE) && defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(DATA_TYPE_OUT)
 
 #define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
 #define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_UCHAR VEC_DATA_TYPE(uchar, VEC_SIZE)
+#define VEC_TYPE VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)
 
 /** This function executes an element-wise operation among two tensors.
  *
@@ -54,8 +54,10 @@
  * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
  * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
  * @attention The element-wise operation to be executed has to be passed at compile time using -DOP (e.g., -DOP=ADD)
+ * @attention For QSYMM16 operations OFFSET_IN1, OFFSET_IN2 and OFFSET_OUT must be set to zero
+ * @attention The data type must be passed at compile time using -DDATA_TYPE_OUT, i.e. -DDATA_TYPE_OUT=uchar
  *
- * @param[in]  in1_ptr                           Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in]  in1_ptr                           Pointer to the source tensor. Supported data types: QASYMM8/QSYMM16
  * @param[in]  in1_stride_x                      Stride of the source tensor in X dimension (in bytes)
  * @param[in]  in1_step_x                        in1_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  in1_stride_y                      Stride of the source tensor in Y dimension (in bytes)
@@ -90,8 +92,8 @@ __kernel void OP_FUN_NAME(OP)(
     Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
     Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
 
-    VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in1.ptr), VEC_INT);
-    VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in2.ptr), VEC_INT);
+    VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_OUT *)in1.ptr), VEC_INT);
+    VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_OUT *)in2.ptr), VEC_INT);
 
     in_a = SUB(in_a, (VEC_INT)((int)OFFSET_IN1));
     in_b = SUB(in_b, (VEC_INT)((int)OFFSET_IN2));
@@ -99,10 +101,10 @@ __kernel void OP_FUN_NAME(OP)(
     const VEC_FLOAT in1f32  = CONVERT(in_a, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN1);
     const VEC_FLOAT in2f32  = CONVERT(in_b, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN2);
     const VEC_FLOAT qresf32 = OP(in1f32, in2f32) / ((VEC_FLOAT)(float)SCALE_OUT) + ((VEC_FLOAT)((float)OFFSET_OUT));
-    const VEC_UCHAR res     = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_UCHAR);
+    const VEC_TYPE  res     = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_TYPE);
 
     // Store result
     VSTORE(VEC_SIZE)
-    (res, 0, (__global uchar *)out.ptr);
+    (res, 0, (__global DATA_TYPE_OUT *)out.ptr);
 }
-#endif /* defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) */
+#endif /* defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(DATA_TYPE_OUT) */
diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
index 1d9c71555a..4c191de0bd 100644
--- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
+++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
@@ -92,14 +92,22 @@ Status validate_arguments_with_float_only_supported_rules(const ITensorInfo &inp
 Status validate_arguments_with_arithmetic_rules(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input2);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
 
-    const bool is_qasymm = is_data_type_quantized_asymmetric(input1.data_type()) || is_data_type_quantized_asymmetric(input2.data_type());
-    if(is_qasymm)
+    const bool is_quantized = is_data_type_quantized(input1.data_type()) || is_data_type_quantized(input2.data_type());
+    if(is_quantized)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);
+
+        if(is_data_type_quantized_symmetric(input1.data_type()))
+        {
+            const int32_t in1_offset = input1.quantization_info().uniform().offset;
+            const int32_t in2_offset = input2.quantization_info().uniform().offset;
+            ARM_COMPUTE_RETURN_ERROR_ON_MSG(in1_offset != 0, "For quantized symmetric, offset must be zero");
+            ARM_COMPUTE_RETURN_ERROR_ON_MSG(in2_offset != 0, "For quantized symmetric, offset must be zero");
+        }
     }
 
     const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
@@ -110,14 +118,21 @@ Status validate_arguments_with_arithmetic_rules(const ITensorInfo &input1, const
     if(output.total_size() > 0)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&output);
-        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32);
         ARM_COMPUTE_RETURN_ERROR_ON_MSG((output.data_type() == DataType::U8) && ((input1.data_type() != DataType::U8) || (input2.data_type() != DataType::U8)),
                                         "Output can only be U8 if both inputs are U8");
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
                                         "Wrong shape for output");
-        if(is_qasymm)
+
+        if(is_quantized)
         {
             ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &output);
+
+            if(is_data_type_quantized_symmetric(output.data_type()))
+            {
+                const int32_t offset = output.quantization_info().uniform().offset;
+                ARM_COMPUTE_RETURN_ERROR_ON_MSG(offset != 0, "For quantized symmetric, offset must be zero");
+            }
         }
     }
     return Status{};
@@ -132,7 +147,7 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &i
     build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output.data_type()));
     build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
     build_opts.add_option("-DOP=" + operation_string);
-    if(is_data_type_quantized_asymmetric(input1.data_type()))
+    if(is_data_type_quantized(input1.data_type()))
     {
         const UniformQuantizationInfo iq1info = input1.quantization_info().uniform();
         const UniformQuantizationInfo iq2info = input2.quantization_info().uniform();
@@ -188,6 +203,14 @@ std::pair<Status, Window> validate_and_configure_window_for_arithmetic_operators
     {
         set_format_if_unknown(output, Format::F32);
     }
+    else if(input1.data_type() == DataType::QASYMM8 || input2.data_type() == DataType::QASYMM8)
+    {
+        set_data_type_if_unknown(output, DataType::QASYMM8);
+    }
+    else if(input1.data_type() == DataType::QSYMM16 || input2.data_type() == DataType::QSYMM16)
+    {
+        set_data_type_if_unknown(output, DataType::QSYMM16);
+    }
 
     return configure_window_arithmetic_common(valid_region, input1, input2, output);
 }
@@ -221,7 +244,7 @@ void CLElementwiseOperationKernel::configure_common(const ICLTensor *input1, con
     _output = output;
 
     std::string kernel_name = "elementwise_operation_" + name();
-    if(is_data_type_quantized_asymmetric(input1->info()->data_type()))
+    if(is_data_type_quantized(input1->info()->data_type()))
     {
         kernel_name += "_quantized";
     }
author	Michele Di Giorgio <michele.digiorgio@arm.com>	2019-06-18 10:23:22 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2019-06-25 09:37:00 +0000
commit	6997fc951e48a1bf8f7591f3b2c4c8d721331b96 (patch)
tree	1cc2b28f5b2a5dbb8d7eb32755df4e8f28a1901d /src
parent	944170e1591ff23c9e6ede2201f0f6aba0f3439b (diff)
download	ComputeLibrary-6997fc951e48a1bf8f7591f3b2c4c8d721331b96.tar.gz