diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2021-08-26 14:47:55 +0100 |
---|---|---|
committer | Michele Di Giorgio <michele.digiorgio@arm.com> | 2021-09-01 22:11:21 +0000 |
commit | 50335fd3d0734157382741fcf1bfdaf630c60c4b (patch) | |
tree | aa6a8e65cbd2245b84d5ca9c4df7ff5f6735e982 /src/core/CL/cl_kernels/common/gemm.cl | |
parent | e2276cb18f9dffd484bc5d11f65f8526fb32d5bd (diff) | |
download | ComputeLibrary-50335fd3d0734157382741fcf1bfdaf630c60c4b.tar.gz |
Remove padding from ClGemmMatrixMultiplyReshapedKernel
Create new macros for loading values from memory while being aware of
boundaries of the tensor to not generate page faults.
Resolves: COMPMID-4447
Change-Id: If9a455291e395ebd9070ebe5e120b3064d8fab29
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6168
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/common/gemm.cl')
-rw-r--r-- | src/core/CL/cl_kernels/common/gemm.cl | 40 |
1 files changed, 20 insertions, 20 deletions
diff --git a/src/core/CL/cl_kernels/common/gemm.cl b/src/core/CL/cl_kernels/common/gemm.cl index 10435d376f..ff153af542 100644 --- a/src/core/CL/cl_kernels/common/gemm.cl +++ b/src/core/CL/cl_kernels/common/gemm.cl @@ -2705,6 +2705,9 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0); + const bool cond_y = ((get_global_id(1) + 1) * M0 >= M); + const bool cond_x = ((get_global_id(0) + 1) * N0 >= N); + #if defined(REINTERPRET_OUTPUT_AS_3D) // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D @@ -2730,7 +2733,7 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -2748,7 +2751,7 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) + (get_global_id(1) * (uint)M0 * bias_stride_y) + get_global_id( 2) * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -2773,9 +2776,6 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), #endif // defined(MIXED_PRECISION) #endif // defined(ACTIVATION_TYPE) - const bool cond_y = ((get_global_id(1) + 1) * M0 >= M); - const bool cond_x = ((get_global_id(0) + 1) * N0 >= N); - // Store output block #if defined(MIXED_PRECISION) CONVERT_BLOCK(M0, N0, DATA_TYPE, c, c_lp); @@ -2975,6 +2975,9 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t_texture(IMAGE_DECLARATION(lhs), REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0); + const bool cond_y = ((get_global_id(1) + 1) * M0 >= M); + const bool cond_x = ((get_global_id(0) + 1) * N0 >= N); + #if defined(REINTERPRET_OUTPUT_AS_3D) // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D @@ -3000,7 +3003,7 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t_texture(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -3018,7 +3021,7 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t_texture(IMAGE_DECLARATION(lhs), __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) + (get_global_id(1) * (uint)M0 * bias_stride_y) + get_global_id( 2) * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -3043,9 +3046,6 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t_texture(IMAGE_DECLARATION(lhs), #endif // defined(MIXED_PRECISION) #endif // defined(ACTIVATION_TYPE) - const bool cond_y = ((get_global_id(1) + 1) * M0 >= M); - const bool cond_x = ((get_global_id(0) + 1) * N0 >= N); - // Store output block #if defined(MIXED_PRECISION) CONVERT_BLOCK(M0, N0, DATA_TYPE, c, c_lp); @@ -3284,6 +3284,9 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt(IMAGE_DECLARATION(lhs), const uint y = get_global_id(1); const uint z = get_global_id(2); + const bool cond_y = ((get_global_id(1) + 1) * M0 >= M); + const bool cond_x = ((get_global_id(0) + 1) * N0 >= N); + #if defined(DUMMY_WORK_ITEMS) if((x * N0 >= N) || (y * M0 >= M)) { @@ -3495,7 +3498,7 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -3513,7 +3516,7 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt(IMAGE_DECLARATION(lhs), __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)) + (get_global_id(1) * (uint)M0 * bias_stride_y) + get_global_id( 2) * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -3537,9 +3540,6 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt(IMAGE_DECLARATION(lhs), #endif // defined(MIXED_PRECISION) #endif // defined(ACTIVATION_TYPE) - const bool cond_y = ((get_global_id(1) + 1) * M0 >= M); - const bool cond_x = ((get_global_id(0) + 1) * N0 >= N); - // Store output block #if defined(MIXED_PRECISION) CONVERT_BLOCK(M0, N0, DATA_TYPE, c, c_lp); @@ -3838,6 +3838,9 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt_texture(IMAGE_DECLARATION(lhs), REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0); + const bool cond_y = ((get_global_id(1) + 1) * M0 >= M); + const bool cond_x = ((get_global_id(0) + 1) * N0 >= N); + #if defined(REINTERPRET_OUTPUT_AS_3D) // The plane (zin) is calculated dividing M (y * M0) by HEIGHT_GEMM3D @@ -3863,7 +3866,7 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt_texture(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -3880,7 +3883,7 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt_texture(IMAGE_DECLARATION(lhs), #else // defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE)) + (y * (uint)M0 * bias_stride_y) + z * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -3904,9 +3907,6 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt_texture(IMAGE_DECLARATION(lhs), #endif // defined(MIXED_PRECISION) #endif // defined(ACTIVATION_TYPE) - const bool cond_y = ((get_global_id(1) + 1) * M0 >= M); - const bool cond_x = ((get_global_id(0) + 1) * N0 >= N); - // Store output block #if defined(MIXED_PRECISION) CONVERT_BLOCK(M0, N0, DATA_TYPE, c, c_lp); |