From 6997fc951e48a1bf8f7591f3b2c4c8d721331b96 Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Tue, 18 Jun 2019 10:23:22 +0100
Subject: COMPMID-2412: Add QSYMM16 support for ElementwiseAddition for CL

Arithmetic addition uses the same code as other element-wise operations.
Hence, adding QSYMM16 support for addition automatically adds the same
support for:

- arithmetic subtraction
- element-wise min
- element-wise max
- squared difference

Change-Id: If986102844f62e29dd23c03f9245910db43f9043
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1384
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 .../CL/cl_kernels/elementwise_operation_quantized.cl   | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'src/core/CL/cl_kernels/elementwise_operation_quantized.cl')

diff --git a/src/core/CL/cl_kernels/elementwise_operation_quantized.cl b/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
index 1b45da164f..a23ae2b005 100644
--- a/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
+++ b/src/core/CL/cl_kernels/elementwise_operation_quantized.cl
@@ -37,11 +37,11 @@
 #define OP_FUN_NAME_STR(op) elementwise_operation_##op##_quantized
 #define OP_FUN_NAME(op) OP_FUN_NAME_STR(op)
 
-#if defined(OP) && defined(VEC_SIZE) && defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT)
+#if defined(OP) && defined(VEC_SIZE) && defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(DATA_TYPE_OUT)
 
 #define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
 #define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_UCHAR VEC_DATA_TYPE(uchar, VEC_SIZE)
+#define VEC_TYPE VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)
 
 /** This function executes an element-wise operation among two tensors.
  *
@@ -54,8 +54,10 @@
  * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
  * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
  * @attention The element-wise operation to be executed has to be passed at compile time using -DOP (e.g., -DOP=ADD)
+ * @attention For QSYMM16 operations OFFSET_IN1, OFFSET_IN2 and OFFSET_OUT must be set to zero
+ * @attention The data type must be passed at compile time using -DDATA_TYPE_OUT, i.e. -DDATA_TYPE_OUT=uchar
  *
- * @param[in]  in1_ptr                           Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in]  in1_ptr                           Pointer to the source tensor. Supported data types: QASYMM8/QSYMM16
  * @param[in]  in1_stride_x                      Stride of the source tensor in X dimension (in bytes)
  * @param[in]  in1_step_x                        in1_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  in1_stride_y                      Stride of the source tensor in Y dimension (in bytes)
@@ -90,8 +92,8 @@ __kernel void OP_FUN_NAME(OP)(
     Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
     Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
 
-    VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in1.ptr), VEC_INT);
-    VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in2.ptr), VEC_INT);
+    VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_OUT *)in1.ptr), VEC_INT);
+    VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_OUT *)in2.ptr), VEC_INT);
 
     in_a = SUB(in_a, (VEC_INT)((int)OFFSET_IN1));
     in_b = SUB(in_b, (VEC_INT)((int)OFFSET_IN2));
@@ -99,10 +101,10 @@ __kernel void OP_FUN_NAME(OP)(
     const VEC_FLOAT in1f32  = CONVERT(in_a, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN1);
     const VEC_FLOAT in2f32  = CONVERT(in_b, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN2);
     const VEC_FLOAT qresf32 = OP(in1f32, in2f32) / ((VEC_FLOAT)(float)SCALE_OUT) + ((VEC_FLOAT)((float)OFFSET_OUT));
-    const VEC_UCHAR res     = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_UCHAR);
+    const VEC_TYPE  res     = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_TYPE);
 
     // Store result
     VSTORE(VEC_SIZE)
-    (res, 0, (__global uchar *)out.ptr);
+    (res, 0, (__global DATA_TYPE_OUT *)out.ptr);
 }
-#endif /* defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) */
+#endif /* defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(DATA_TYPE_OUT) */
-- 
cgit v1.2.1