aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-11-29 11:06:49 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:41:58 +0000
commit45bcc3a1c287a208098ae99288273a5129ddd5eb (patch)
treef4f957dbc76f8e8e9a4871b16652e1033bcd4c73 /src/core/CL/cl_kernels
parent303be90ee1f03f75309b421297ba16428ea98ea5 (diff)
downloadComputeLibrary-45bcc3a1c287a208098ae99288273a5129ddd5eb.tar.gz
COMPMID-661: QASYMM8 support for fully connected layer.
Change-Id: I70e04d3a175ba366432ada98e9ca893c9f81b260 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111094 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r--src/core/CL/cl_kernels/convolution_layer.cl6
-rw-r--r--src/core/CL/cl_kernels/gemmlowp.cl17
2 files changed, 11 insertions, 12 deletions
diff --git a/src/core/CL/cl_kernels/convolution_layer.cl b/src/core/CL/cl_kernels/convolution_layer.cl
index e3018461e3..c7e3e644f4 100644
--- a/src/core/CL/cl_kernels/convolution_layer.cl
+++ b/src/core/CL/cl_kernels/convolution_layer.cl
@@ -151,10 +151,14 @@ __kernel void im2col_generic(
{
#if PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
*output_ptr = *((__global DATA_TYPE *)(input_ptr + x * src_stride_x + y * src_stride_y));
-#else // PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
+#else // PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
if(x < 0 || x >= SRC_WIDTH || y < 0 || y >= SRC_HEIGHT)
{
+#if defined(OFFSET)
+ *output_ptr = OFFSET;
+#else /* OFFSET */
*output_ptr = 0;
+#endif /* OFFSET */
}
else
{
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index 7cd0c0b8db..16f8fe9f7f 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "helpers.h"
+#include "helpers_asymm.h"
#if defined(COLS_B)
/** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1)
@@ -428,7 +429,7 @@ __kernel void gemmlowp_offset_contribution(TENSOR3D_DECLARATION(mm_result)
Image sum_col = CONVERT_TO_IMAGE_STRUCT(sum_col);
// Compute the offset contribution due to A_OFFSET
- a_offset_s32 = vload16(0, (__global int *)sum_col.ptr + get_global_id(2) * sum_col_stride_y);
+ a_offset_s32 = vload16(0, (__global int *)(sum_col.ptr));
a_offset_s32 *= (int16)A_OFFSET;
#endif // defined(A_OFFSET)
@@ -507,23 +508,17 @@ __kernel void gemmlowp_output_stage_quantize_down(TENSOR3D_DECLARATION(src),
int16 input_values = vload16(0, (__global int *)src.ptr);
- // Add the offset terms to GEMM's result
- input_values += (int16)RESULT_OFFSET;
-
- // Multiply by result_mult_int
- input_values *= (int16)RESULT_MULT_INT;
-
#if defined(ADD_BIAS)
// Add bias
const int16 biases_values = vload16(0, (__global int *)biases.ptr);
input_values += (int16)biases_values;
#endif // defined(ADD_BIAS)
- // Shift final result
- input_values >>= RESULT_SHIFT;
+ // Multiply by result_mult_int and shift
+ input_values = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(input_values, RESULT_MULT_INT, RESULT_SHIFT, 16);
- // Saturate negative values
- input_values = max(input_values, (int16)0);
+ // Add the offset terms to GEMM's result
+ input_values += (int16)RESULT_OFFSET;
uchar16 res = convert_uchar16_sat(input_values);