From ed902bce67d7e6a1d918806bc172d17e2b415c4e Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Thu, 22 Oct 2020 12:05:09 +0100 Subject: COMPMID-3878: Fix nightly failure due to missing conversion to output data type In gemmlowp_matrix_b_reduction kernel the accumulator data type might be set to uint if the input data type is unsigned quantized. However, the output of this kernel is always a signed integer, hence we need to convert the result before storing in memory. Change-Id: I9b936fbbcb8cd64319c42872648f5058f686b228 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4233 Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/CL/cl_kernels/gemmlowp.cl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index feefaa7197..97150e05a2 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -1278,7 +1278,7 @@ __kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src), __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes + x_offs * sizeof(int) + y * dst_stride_y; VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) - sum_col_32_0 = (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))0; + sum_col_32 = (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))0; int i = 0; // This for loop performs 4 accumulations @@ -1293,8 +1293,8 @@ __kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src), const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) b3 = VLOAD(VEC_SIZE)(0, matrix_b + 3 * src_stride_y); - sum_col_32_0 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)) + CONVERT(b1, VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)) + CONVERT(b2, VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)) + CONVERT(b3, - VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); + sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)) + CONVERT(b1, VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)) + CONVERT(b2, VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)) + CONVERT(b3, + VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); matrix_b += 4 * src_stride_y; } @@ -1305,15 +1305,18 @@ __kernel void gemmlowp_matrix_b_reduction(TENSOR3D_DECLARATION(src), const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) b0 = VLOAD(VEC_SIZE)(0, matrix_b); - sum_col_32_0 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); + sum_col_32 += CONVERT(b0, VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); matrix_b += src_stride_y; } #if defined(SCALAR) - sum_col_32_0 *= (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))SCALAR; + sum_col_32 *= (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))SCALAR; #endif // defined(SCALAR) - STORE_VECTOR_SELECT(sum_col_32_, int, dst_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0) + VEC_DATA_TYPE(int, VEC_SIZE) + res0 = CONVERT(sum_col_32, VEC_DATA_TYPE(int, VEC_SIZE)); + + STORE_VECTOR_SELECT(res, int, dst_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0) } #endif // defined(COLS_B) && defined(ROWS_B) && defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER) -- cgit v1.2.1