From 14cbfb2921990d8bf125231e350e2ac8dcd95a8b Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 23 Oct 2019 10:53:10 +0100 Subject: COMPMID-2609: Enable quantization with multiplier greater than 1 on OpenCL Change-Id: I050f1f84e214e61f7cbb0197a672b68a4940edae Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2158 Comments-Addressed: Arm Jenkins Reviewed-by: Manuel Bottini Tested-by: Arm Jenkins Reviewed-by: Giorgio Arena --- src/core/CL/cl_kernels/direct_convolution_quantized.cl | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'src/core/CL/cl_kernels/direct_convolution_quantized.cl') diff --git a/src/core/CL/cl_kernels/direct_convolution_quantized.cl b/src/core/CL/cl_kernels/direct_convolution_quantized.cl index 1182428cd5..37fd9a0778 100644 --- a/src/core/CL/cl_kernels/direct_convolution_quantized.cl +++ b/src/core/CL/cl_kernels/direct_convolution_quantized.cl @@ -25,7 +25,7 @@ #undef CONVERT_SAT -#if defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) +#if defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) && defined(OUTPUT_MULTIPLIER) && defined(OUTPUT_SHIFT) #if KERNEL_SIZE == 9 @@ -194,6 +194,8 @@ inline uchar8 extract_input_stride3(__global const uchar *input_pixel) * @note The convolution stride x must be passed at compile time using -DSTRIDE_X e.g. -DSTRIDE_X=1 * @note The third dimensions of the weights tensors must be passed at compile time using -DWEIGHTS_DEPTH * @note If biases are used then -DHAS_BIAS has to be passed at compile time + * @note The output quantization multiplier must be passed at compile time using -DOUTPUT_MULTIPLIER e.g. -DOUTPUT_MULTIPLIER=1234 + * @note The output quantization shift must be passed at compile time using -DOUTPUT_SHIFT e.g. -DOUTPUT_SHIFT=4 * * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -227,8 +229,6 @@ inline uchar8 extract_input_stride3(__global const uchar *input_pixel) * @param[in] input_offset Input offset quantization parameter * @param[in] weight_offset Weights offset quantization parameter * @param[in] output_offset Output offset quantization parameter - * @param[in] output_multiplier Output integer multiplier quantization parameter - * @param[in] output_shift Output integer shift quantization parameter */ __kernel void direct_convolution_quantized( TENSOR3D_DECLARATION(src), @@ -240,9 +240,7 @@ __kernel void direct_convolution_quantized( unsigned int weights_stride_w, int input_offset, int weight_offset, - int output_offset, - int output_multiplier, - int output_shift) + int output_offset) { Image src = CONVERT_TO_IMAGE_STRUCT(src); Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights); @@ -294,9 +292,13 @@ __kernel void direct_convolution_quantized( pixels0 += (int8)(*bias_addr); #endif /* defined(HAS_BIAS) */ - pixels0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(pixels0, output_multiplier, output_shift, 8); +#if OUTPUT_SHIFT < 0 + pixels0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(pixels0, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8); +#else // OUTPUT_SHIFT < 0 + pixels0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(pixels0, OUTPUT_MULTIPLIER, OUTPUT_SHIFT, 8); +#endif // OUTPUT_SHIFT < 0 pixels0 = pixels0 + output_offset; vstore8(convert_uchar8_sat(pixels0), 0, (__global uchar *)dst.ptr); } -#endif // defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) +#endif // defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) && defined(OUTPUT_MULTIPLIER) && defined(OUTPUT_SHIFT) -- cgit v1.2.1