From 45bcc3a1c287a208098ae99288273a5129ddd5eb Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 29 Nov 2017 11:06:49 +0000 Subject: COMPMID-661: QASYMM8 support for fully connected layer. Change-Id: I70e04d3a175ba366432ada98e9ca893c9f81b260 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111094 Reviewed-by: Gian Marco Iodice Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Anthony Barbier --- src/core/CL/cl_kernels/gemmlowp.cl | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'src/core/CL/cl_kernels/gemmlowp.cl') diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index 7cd0c0b8db..16f8fe9f7f 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "helpers.h" +#include "helpers_asymm.h" #if defined(COLS_B) /** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1) @@ -428,7 +429,7 @@ __kernel void gemmlowp_offset_contribution(TENSOR3D_DECLARATION(mm_result) Image sum_col = CONVERT_TO_IMAGE_STRUCT(sum_col); // Compute the offset contribution due to A_OFFSET - a_offset_s32 = vload16(0, (__global int *)sum_col.ptr + get_global_id(2) * sum_col_stride_y); + a_offset_s32 = vload16(0, (__global int *)(sum_col.ptr)); a_offset_s32 *= (int16)A_OFFSET; #endif // defined(A_OFFSET) @@ -507,23 +508,17 @@ __kernel void gemmlowp_output_stage_quantize_down(TENSOR3D_DECLARATION(src), int16 input_values = vload16(0, (__global int *)src.ptr); - // Add the offset terms to GEMM's result - input_values += (int16)RESULT_OFFSET; - - // Multiply by result_mult_int - input_values *= (int16)RESULT_MULT_INT; - #if defined(ADD_BIAS) // Add bias const int16 biases_values = vload16(0, (__global int *)biases.ptr); input_values += (int16)biases_values; #endif // defined(ADD_BIAS) - // Shift final result - input_values >>= RESULT_SHIFT; + // Multiply by result_mult_int and shift + input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(input_values, RESULT_MULT_INT, RESULT_SHIFT, 16); - // Saturate negative values - input_values = max(input_values, (int16)0); + // Add the offset terms to GEMM's result + input_values += (int16)RESULT_OFFSET; uchar16 res = convert_uchar16_sat(input_values); -- cgit v1.2.1