COMPMID-2609: Enable quantization with multiplier greater than 1 on OpenCL

Change-Id: I050f1f84e214e61f7cbb0197a672b68a4940edae Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2158 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
author: Michele Di Giorgio <michele.digiorgio@arm.com> 2019-10-23 10:53:10 +0100
committer: Michele Di Giorgio <michele.digiorgio@arm.com> 2019-11-28 10:02:15 +0000
commit: 14cbfb2921990d8bf125231e350e2ac8dcd95a8b (patch)
tree: 9bec073d72c44c480c8807601889481d9b89ee7e /src/core/CL/cl_kernels/direct_convolution_quantized.cl
parent: ed7b27dd7cbdae57b880029840ad0235523848e0 (diff)
download: ComputeLibrary-14cbfb2921990d8bf125231e350e2ac8dcd95a8b.tar.gz
1 files changed, 10 insertions, 8 deletions
diff --git a/src/core/CL/cl_kernels/direct_convolution_quantized.cl b/src/core/CL/cl_kernels/direct_convolution_quantized.cl
index 1182428cd5..37fd9a0778 100644
--- a/src/core/CL/cl_kernels/direct_convolution_quantized.cl
+++ b/src/core/CL/cl_kernels/direct_convolution_quantized.cl
@@ -25,7 +25,7 @@
 
 #undef CONVERT_SAT
 
-#if defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH)
+#if defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) && defined(OUTPUT_MULTIPLIER) && defined(OUTPUT_SHIFT)
 
 #if KERNEL_SIZE == 9
 
@@ -194,6 +194,8 @@ inline uchar8 extract_input_stride3(__global const uchar *input_pixel)
  * @note The convolution stride x must be passed at compile time using -DSTRIDE_X e.g. -DSTRIDE_X=1
  * @note The third dimensions of the weights tensors must be passed at compile time using -DWEIGHTS_DEPTH
  * @note If biases are used then -DHAS_BIAS has to be passed at compile time
+ * @note The output quantization multiplier must be passed at compile time using -DOUTPUT_MULTIPLIER e.g. -DOUTPUT_MULTIPLIER=1234
+ * @note The output quantization shift must be passed at compile time using -DOUTPUT_SHIFT e.g. -DOUTPUT_SHIFT=4
  *
  * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: QASYMM8
  * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
@@ -227,8 +229,6 @@ inline uchar8 extract_input_stride3(__global const uchar *input_pixel)
  * @param[in]  input_offset                          Input offset quantization parameter
  * @param[in]  weight_offset                         Weights offset quantization parameter
  * @param[in]  output_offset                         Output offset quantization parameter
- * @param[in]  output_multiplier                     Output integer multiplier quantization parameter
- * @param[in]  output_shift                          Output integer shift quantization parameter
  */
 __kernel void direct_convolution_quantized(
     TENSOR3D_DECLARATION(src),
@@ -240,9 +240,7 @@ __kernel void direct_convolution_quantized(
     unsigned int weights_stride_w,
     int          input_offset,
     int          weight_offset,
-    int          output_offset,
-    int          output_multiplier,
-    int          output_shift)
+    int          output_offset)
 {
     Image    src     = CONVERT_TO_IMAGE_STRUCT(src);
     Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
@@ -294,9 +292,13 @@ __kernel void direct_convolution_quantized(
     pixels0 += (int8)(*bias_addr);
 #endif /* defined(HAS_BIAS) */
 
-    pixels0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(pixels0, output_multiplier, output_shift, 8);
+#if OUTPUT_SHIFT < 0
+    pixels0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(pixels0, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8);
+#else  // OUTPUT_SHIFT < 0
+    pixels0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(pixels0, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8);
+#endif // OUTPUT_SHIFT < 0
     pixels0 = pixels0 + output_offset;
 
     vstore8(convert_uchar8_sat(pixels0), 0, (__global uchar *)dst.ptr);
 }
-#endif // defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH)
+#endif // defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) && defined(OUTPUT_MULTIPLIER) && defined(OUTPUT_SHIFT)
author	Michele Di Giorgio <michele.digiorgio@arm.com>	2019-10-23 10:53:10 +0100
committer	Michele Di Giorgio <michele.digiorgio@arm.com>	2019-11-28 10:02:15 +0000
commit	14cbfb2921990d8bf125231e350e2ac8dcd95a8b (patch)
tree	9bec073d72c44c480c8807601889481d9b89ee7e /src/core/CL/cl_kernels/direct_convolution_quantized.cl
parent	ed7b27dd7cbdae57b880029840ad0235523848e0 (diff)
download	ComputeLibrary-14cbfb2921990d8bf125231e350e2ac8dcd95a8b.tar.gz