From 14cbfb2921990d8bf125231e350e2ac8dcd95a8b Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 23 Oct 2019 10:53:10 +0100 Subject: COMPMID-2609: Enable quantization with multiplier greater than 1 on OpenCL Change-Id: I050f1f84e214e61f7cbb0197a672b68a4940edae Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2158 Comments-Addressed: Arm Jenkins Reviewed-by: Manuel Bottini Tested-by: Arm Jenkins Reviewed-by: Giorgio Arena --- src/core/CL/cl_kernels/gemmlowp.cl | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'src/core/CL/cl_kernels/gemmlowp.cl') diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index 7a97fa6fa1..fa08b149e4 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -1673,9 +1673,17 @@ __kernel void gemmlowp_offset_contribution_quantize_down_fixedpoint(TENSOR3D_DEC int4 result_multipliers_values = vload4(0, (__global int *)result_multipliers_addr); int4 result_shifts_values = vload4(0, (__global int *)result_shifts_addr); - in_s32 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(in_s32, result_multipliers_values, result_shifts_values, 4); -#else // !defined(PER_CHANNEL_QUANTIZATION) + int4 in_s32_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(in_s32, result_multipliers_values, result_shifts_values, 4); + int4 in_s32_shift_gt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(in_s32, result_multipliers_values, result_shifts_values, 4); + in_s32 = select(in_s32_shift_lt0, in_s32_shift_gt0, result_shifts_values >= 0); +#else // defined(PER_CHANNEL_QUANTIZATION) + +#if RESULT_SHIFT < 0 + in_s32 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(in_s32, RESULT_MULTIPLIER, RESULT_SHIFT, 4); +#else // RESULT_SHIFT >= 0 in_s32 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(in_s32, RESULT_MULTIPLIER, RESULT_SHIFT, 4); +#endif // RESULT_SHIFT < 0 + #endif // defined(PER_CHANNEL_QUANTIZATION) // Add the offset terms to GEMM's result @@ -1768,7 +1776,11 @@ __kernel void gemmlowp_output_stage_quantize_down(TENSOR3D_DECLARATION(src), // Multiply by result_mult_int and shift input_values *= RESULT_MULT_INT; +#if RESULT_SHIFT < 0 + input_values >>= -RESULT_SHIFT; +#else // RESULT_SHIFT >= 0 input_values >>= RESULT_SHIFT; +#endif // RESULT_SHIFT < 0 uchar4 res = convert_uchar4_sat(input_values); @@ -1850,7 +1862,11 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint(TENSOR3D_DECLARATIO #endif // defined(ADD_BIAS) // Multiply by result_mult_int and shift +#if RESULT_SHIFT < 0 + input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4); +#else // RESULT_SHIFT >= 0 input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4); +#endif // RESULT_SHIFT < 0 // Add the offset terms to GEMM's result input_values += (int4)RESULT_OFFSET_AFTER_SHIFT; @@ -1937,7 +1953,7 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DE // Multiply by result_mult_int and shift #if RESULT_SHIFT < 0 - input_values = ASYMM_MULT(input_values * (1 << (-RESULT_SHIFT)), RESULT_FIXEDPOINT_MULTIPLIER, 4); + input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4); #else // RESULT_SHIFT >= 0 input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4); #endif // RESULT_SHIFT < 0 -- cgit v1.2.1