aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/gemmlowp.cl
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2019-10-23 10:53:10 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2019-11-28 10:02:15 +0000
commit14cbfb2921990d8bf125231e350e2ac8dcd95a8b (patch)
tree9bec073d72c44c480c8807601889481d9b89ee7e /src/core/CL/cl_kernels/gemmlowp.cl
parented7b27dd7cbdae57b880029840ad0235523848e0 (diff)
downloadComputeLibrary-14cbfb2921990d8bf125231e350e2ac8dcd95a8b.tar.gz
COMPMID-2609: Enable quantization with multiplier greater than 1 on OpenCL
Change-Id: I050f1f84e214e61f7cbb0197a672b68a4940edae Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2158 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/gemmlowp.cl')
-rw-r--r--src/core/CL/cl_kernels/gemmlowp.cl22
1 files changed, 19 insertions, 3 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index 7a97fa6fa1..fa08b149e4 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -1673,9 +1673,17 @@ __kernel void gemmlowp_offset_contribution_quantize_down_fixedpoint(TENSOR3D_DEC
int4 result_multipliers_values = vload4(0, (__global int *)result_multipliers_addr);
int4 result_shifts_values = vload4(0, (__global int *)result_shifts_addr);
- in_s32 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(in_s32, result_multipliers_values, result_shifts_values, 4);
-#else // !defined(PER_CHANNEL_QUANTIZATION)
+ int4 in_s32_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(in_s32, result_multipliers_values, result_shifts_values, 4);
+ int4 in_s32_shift_gt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(in_s32, result_multipliers_values, result_shifts_values, 4);
+ in_s32 = select(in_s32_shift_lt0, in_s32_shift_gt0, result_shifts_values >= 0);
+#else // defined(PER_CHANNEL_QUANTIZATION)
+
+#if RESULT_SHIFT < 0
+ in_s32 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(in_s32, RESULT_MULTIPLIER, RESULT_SHIFT, 4);
+#else // RESULT_SHIFT >= 0
in_s32 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(in_s32, RESULT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
+
#endif // defined(PER_CHANNEL_QUANTIZATION)
// Add the offset terms to GEMM's result
@@ -1768,7 +1776,11 @@ __kernel void gemmlowp_output_stage_quantize_down(TENSOR3D_DECLARATION(src),
// Multiply by result_mult_int and shift
input_values *= RESULT_MULT_INT;
+#if RESULT_SHIFT < 0
+ input_values >>= -RESULT_SHIFT;
+#else // RESULT_SHIFT >= 0
input_values >>= RESULT_SHIFT;
+#endif // RESULT_SHIFT < 0
uchar4 res = convert_uchar4_sat(input_values);
@@ -1850,7 +1862,11 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint(TENSOR3D_DECLARATIO
#endif // defined(ADD_BIAS)
// Multiply by result_mult_int and shift
+#if RESULT_SHIFT < 0
+ input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#else // RESULT_SHIFT >= 0
input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
+#endif // RESULT_SHIFT < 0
// Add the offset terms to GEMM's result
input_values += (int4)RESULT_OFFSET_AFTER_SHIFT;
@@ -1937,7 +1953,7 @@ __kernel void gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16(TENSOR3D_DE
// Multiply by result_mult_int and shift
#if RESULT_SHIFT < 0
- input_values = ASYMM_MULT(input_values * (1 << (-RESULT_SHIFT)), RESULT_FIXEDPOINT_MULTIPLIER, 4);
+ input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
#else // RESULT_SHIFT >= 0
input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(input_values, RESULT_FIXEDPOINT_MULTIPLIER, RESULT_SHIFT, 4);
#endif // RESULT_SHIFT < 0