From 5b52fe3a4481769adcf42218a3747486cb4e9c14 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Thu, 12 Jul 2018 12:42:35 +0100
Subject: COMPMID-1390: OCLGrind and benchmark tests fail for QASYMM8
 COMPMID-1392: OCLGrind failures in im2col1x1_stridex1_dchw COMPMID-1395:
 OCLGrind failures in output_stage_quantized

Change-Id: I35504bd1f701316df122be52d458c71bbd7e7909
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139722
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 .../direct_convolution_1x1_3x3_5x5_quantized.cl    | 24 ++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

(limited to 'src/core/CL/cl_kernels')

diff --git a/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl b/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl
index ae87420774..83da76785b 100644
--- a/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl
+++ b/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl
@@ -248,6 +248,12 @@ __kernel void direct_convolution_1x1_3x3_5x5_quantized(
 }
 #endif // defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH)
 
+#if defined(VEC_SIZE)
+
+#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
+#define CONVERT_SAT_UCHAR_STR(x, size) (convert_uchar##size##_sat((x)))
+#define CONVERT_SAT_UCHAR(x, size) CONVERT_SAT_UCHAR_STR(x, size)
+
 /** This function computes the output stage of a depthwise convolution.
  *
  * @param[in] src_ptr                            Pointer to the source image. Supported data types: QASYMM8
@@ -274,7 +280,6 @@ __kernel void direct_convolution_1x1_3x3_5x5_quantized(
  * @param[in] output_multiplier                  Output scale multiplier
  * @param[in] output_shift                       Output scale divisor exponent
  */
-
 __kernel void output_stage_quantized(
     TENSOR3D_DECLARATION(src),
     TENSOR3D_DECLARATION(dst),
@@ -292,22 +297,29 @@ __kernel void output_stage_quantized(
 #endif //defined(HAS_BIAS)
 
     // Load input
-    int16 vals = vload16(0, (__global int *)(src.ptr));
+    VEC_INT vals = VLOAD(VEC_SIZE)(0, (__global int *)(src.ptr));
 
 #if defined(HAS_BIAS)
     // Load and add bias
 #if defined(NCHW)
     int bias_value = *((__global int *)(vector_offset(&bias, get_global_id(2))));
 #else  // defined(NCHW)
-    int16 bias_value = vload16(0, ((__global int *)(vector_offset(&bias, get_global_id(0) * 16))));
+    VEC_INT bias_value = VLOAD(VEC_SIZE)(0, ((__global int *)(vector_offset(&bias, get_global_id(0) * VEC_SIZE))));
 #endif // defined(NCHW)
 
-    vals += (int16)(bias_value);
+    vals += (VEC_INT)(bias_value);
 #endif //defined(HAS_BIAS)
 
-    vals = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(vals, output_multiplier, output_shift, 16);
+    vals = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(vals, output_multiplier, output_shift, VEC_SIZE);
     vals = vals + output_offset;
 
     // Store result in dst
-    vstore16(convert_uchar16_sat(vals), 0, (__global uchar *)dst.ptr);
+    VSTORE(VEC_SIZE)
+    (CONVERT_SAT_UCHAR(vals, VEC_SIZE), 0, (__global uchar *)dst.ptr);
 }
+
+#undef VEC_INT
+#undef CONVERT_SAT_UCHAR_STR
+#undef CONVERT_SAT_UCHAR
+
+#endif // defined(VEC_SIZE)
-- 
cgit v1.2.1