aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/gemmlowp.cl
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-11-29 11:06:49 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:41:58 +0000
commit45bcc3a1c287a208098ae99288273a5129ddd5eb (patch)
treef4f957dbc76f8e8e9a4871b16652e1033bcd4c73 /src/core/CL/cl_kernels/gemmlowp.cl
parent303be90ee1f03f75309b421297ba16428ea98ea5 (diff)
downloadComputeLibrary-45bcc3a1c287a208098ae99288273a5129ddd5eb.tar.gz
COMPMID-661: QASYMM8 support for fully connected layer.
Change-Id: I70e04d3a175ba366432ada98e9ca893c9f81b260 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111094 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/gemmlowp.cl')
-rw-r--r--src/core/CL/cl_kernels/gemmlowp.cl17
1 files changed, 6 insertions, 11 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index 7cd0c0b8db..16f8fe9f7f 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "helpers.h"
+#include "helpers_asymm.h"
#if defined(COLS_B)
/** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1)
@@ -428,7 +429,7 @@ __kernel void gemmlowp_offset_contribution(TENSOR3D_DECLARATION(mm_result)
Image sum_col = CONVERT_TO_IMAGE_STRUCT(sum_col);
// Compute the offset contribution due to A_OFFSET
- a_offset_s32 = vload16(0, (__global int *)sum_col.ptr + get_global_id(2) * sum_col_stride_y);
+ a_offset_s32 = vload16(0, (__global int *)(sum_col.ptr));
a_offset_s32 *= (int16)A_OFFSET;
#endif // defined(A_OFFSET)
@@ -507,23 +508,17 @@ __kernel void gemmlowp_output_stage_quantize_down(TENSOR3D_DECLARATION(src),
int16 input_values = vload16(0, (__global int *)src.ptr);
- // Add the offset terms to GEMM's result
- input_values += (int16)RESULT_OFFSET;
-
- // Multiply by result_mult_int
- input_values *= (int16)RESULT_MULT_INT;
-
#if defined(ADD_BIAS)
// Add bias
const int16 biases_values = vload16(0, (__global int *)biases.ptr);
input_values += (int16)biases_values;
#endif // defined(ADD_BIAS)
- // Shift final result
- input_values >>= RESULT_SHIFT;
+ // Multiply by result_mult_int and shift
+ input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(input_values, RESULT_MULT_INT, RESULT_SHIFT, 16);
- // Saturate negative values
- input_values = max(input_values, (int16)0);
+ // Add the offset terms to GEMM's result
+ input_values += (int16)RESULT_OFFSET;
uchar16 res = convert_uchar16_sat(input_values);