diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2020-10-22 11:07:33 +0100 |
---|---|---|
committer | Michele Di Giorgio <michele.digiorgio@arm.com> | 2020-10-22 13:23:24 +0000 |
commit | 410bca42f560c87d4860dc5ae7374437ded2cd76 (patch) | |
tree | 4dfdbe43964c24d7451437bb518cde77fc29211b /src/core | |
parent | ed902bce67d7e6a1d918806bc172d17e2b415c4e (diff) | |
download | ComputeLibrary-410bca42f560c87d4860dc5ae7374437ded2cd76.tar.gz |
COMPMID-3599: Fix OpenCL gemmlowp_offset_contribution kernel
The kernel was not using the preprocessor arguments needed avoiding the
use of padding.
Change-Id: I6b5fdf4f3f14edbef60b9d5b60179d619700bc00
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4232
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/CL/cl_kernels/gemmlowp.cl | 50 |
1 files changed, 25 insertions, 25 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl index 97150e05a2..950faeca0b 100644 --- a/src/core/CL/cl_kernels/gemmlowp.cl +++ b/src/core/CL/cl_kernels/gemmlowp.cl @@ -1486,44 +1486,44 @@ __kernel void gemmlowp_offset_contribution(TENSOR3D_DECLARATION(mm_result) const int z = get_global_id(2); // Compute offset contribution - int4 offset_term_s32 = offset_contribution( - x, y, z + VEC_INT offset_term_s32 = offset_contribution( + x, y, z #if defined(A_OFFSET) - , - sum_col_ptr, - sum_col_stride_x, - sum_col_step_x, - sum_col_stride_y, - sum_col_step_y, - sum_col_offset_first_element_in_bytes + , + sum_col_ptr, + sum_col_stride_x, + sum_col_step_x, + sum_col_stride_y, + sum_col_step_y, + sum_col_offset_first_element_in_bytes #endif // defined(A_OFFSET) #if defined(B_OFFSET) - , - sum_row_ptr, - sum_row_stride_x, - sum_row_step_x, - sum_row_stride_y, - sum_row_step_y, - sum_row_offset_first_element_in_bytes + , + sum_row_ptr, + sum_row_stride_x, + sum_row_step_x, + sum_row_stride_y, + sum_row_step_y, + sum_row_offset_first_element_in_bytes #endif // defined(B_OFFSET) #if defined(ADD_BIAS) - , - biases_ptr, - biases_stride_x, - biases_step_x, - biases_offset_first_element_in_bytes + , + biases_ptr, + biases_stride_x, + biases_step_x, + biases_offset_first_element_in_bytes #endif // defined(ADD_BIAS) - ); + ); __global uchar *mm_result_addr = mm_result_ptr + mm_result_offset_first_element_in_bytes + x * sizeof(int) + y * mm_result_stride_y + z * mm_result_stride_z; - int4 in_s32 = vload4(0, (__global int *)mm_result_addr); + VEC_INT in_s32_0 = VLOAD(VEC_SIZE)(0, (__global int *)mm_result_addr); // Add the offset terms to GEMM's result - in_s32 += offset_term_s32; + in_s32_0 += offset_term_s32; // Store the result with the offset contribution - vstore4(in_s32, 0, (__global int *)mm_result_addr); + STORE_VECTOR_SELECT(in_s32_, int, mm_result_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0) } #if defined(RESULT_OFFSET) && defined(RESULT_MULTIPLIER) && defined(RESULT_SHIFT) && defined(OUTPUT_DATA_TYPE) |