From 951d520f492348ce07085f701078adcb48a4c1a2 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 8 Sep 2021 13:26:06 +0100 Subject: Remove padding from ClGemmMatrixMultiplyReshapedOnlyRhsKernel Resolve COMPMID-4450 Signed-off-by: Giorgio Arena Change-Id: I6f280d5d66ec43fb5cb06c83fe15a1f227ad165d Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6232 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- src/core/CL/cl_kernels/common/gemm.cl | 40 +++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'src/core/CL/cl_kernels') diff --git a/src/core/CL/cl_kernels/common/gemm.cl b/src/core/CL/cl_kernels/common/gemm.cl index 76e6c21ab6..87921f51fd 100644 --- a/src/core/CL/cl_kernels/common/gemm.cl +++ b/src/core/CL/cl_kernels/common/gemm.cl @@ -1096,6 +1096,9 @@ __kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), uint y = get_global_id(1); uint z = get_global_id(2); + const bool cond_y = y == 0; + const bool cond_x = ((x + 1) * N0 >= N); + #if defined(DUMMY_WORK_ITEMS) if((x * N0 >= N) || (y * M0 >= M)) { @@ -1250,7 +1253,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, false, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -1262,7 +1265,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), #else // defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) * bias_stride_y) + z * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -1278,9 +1281,6 @@ __kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL); #endif // defined(ACTIVATION_TYPE) - const bool cond_y = y == 0; - const bool cond_x = ((x + 1) * N0 >= N); - // Store output block STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); @@ -1392,6 +1392,9 @@ __kernel void gemm_mm_reshaped_only_rhs_t_texture(IMAGE_DECLARATION(lhs), uint y = get_global_id(1); uint z = get_global_id(2); + const bool cond_y = y == 0; + const bool cond_x = ((x + 1) * N0 >= N); + #if defined(DUMMY_WORK_ITEMS) if((x * N0 >= N) || (y * M0 >= M)) { @@ -1596,7 +1599,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t_texture(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, false, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -1608,7 +1611,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t_texture(IMAGE_DECLARATION(lhs), #else // defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) * bias_stride_y) + z * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -1624,9 +1627,6 @@ __kernel void gemm_mm_reshaped_only_rhs_t_texture(IMAGE_DECLARATION(lhs), ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL); #endif // defined(ACTIVATION_TYPE) - const bool cond_y = y == 0; - const bool cond_x = ((x + 1) * N0 >= N); - // Store output block STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); @@ -1813,6 +1813,9 @@ __kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), uint y = get_global_id(1); uint z = get_global_id(2); + const bool cond_y = y == 0; + const bool cond_x = ((x + 1) * N0 >= N); + #if defined(DUMMY_WORK_ITEMS) if((x * N0 >= N) || (y * M0 >= M)) { @@ -1992,7 +1995,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, false, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -2004,7 +2007,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), #else // defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) * bias_stride_y) + z * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -2020,9 +2023,6 @@ __kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL); #endif // defined(ACTIVATION_TYPE) - const bool cond_y = y == 0; - const bool cond_x = ((x + 1) * N0 >= N); - // Store output block STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); @@ -2130,6 +2130,9 @@ __kernel void gemm_mm_reshaped_only_rhs_nt_texture(IMAGE_DECLARATION(lhs), uint y = get_global_id(1); uint z = get_global_id(2); + const bool cond_y = y == 0; + const bool cond_x = ((x + 1) * N0 >= N); + #if defined(DUMMY_WORK_ITEMS) if((x * N0 >= N) || (y * M0 >= M)) { @@ -2301,7 +2304,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt_texture(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, false, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -2313,7 +2316,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt_texture(IMAGE_DECLARATION(lhs), #else // defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) * bias_stride_y) + z * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -2329,9 +2332,6 @@ __kernel void gemm_mm_reshaped_only_rhs_nt_texture(IMAGE_DECLARATION(lhs), ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL); #endif // defined(ACTIVATION_TYPE) - const bool cond_y = y == 0; - const bool cond_x = ((x + 1) * N0 >= N); - // Store output block STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); -- cgit v1.2.1