diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2021-09-08 13:26:06 +0100 |
---|---|---|
committer | Giorgio Arena <giorgio.arena@arm.com> | 2021-09-09 11:45:01 +0000 |
commit | 951d520f492348ce07085f701078adcb48a4c1a2 (patch) | |
tree | fc768db80e059607cb46db4eb80725b1bb430131 /src/core | |
parent | 2f9ae16206cc7e646e74391226b4ed16115a6100 (diff) | |
download | ComputeLibrary-951d520f492348ce07085f701078adcb48a4c1a2.tar.gz |
Remove padding from ClGemmMatrixMultiplyReshapedOnlyRhsKernel
Resolve COMPMID-4450
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Change-Id: I6f280d5d66ec43fb5cb06c83fe15a1f227ad165d
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6232
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/CL/cl_kernels/common/gemm.cl | 40 |
1 files changed, 20 insertions, 20 deletions
diff --git a/src/core/CL/cl_kernels/common/gemm.cl b/src/core/CL/cl_kernels/common/gemm.cl index 76e6c21ab6..87921f51fd 100644 --- a/src/core/CL/cl_kernels/common/gemm.cl +++ b/src/core/CL/cl_kernels/common/gemm.cl @@ -1096,6 +1096,9 @@ __kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), uint y = get_global_id(1); uint z = get_global_id(2); + const bool cond_y = y == 0; + const bool cond_x = ((x + 1) * N0 >= N); + #if defined(DUMMY_WORK_ITEMS) if((x * N0 >= N) || (y * M0 >= M)) { @@ -1250,7 +1253,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, false, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -1262,7 +1265,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), #else // defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) * bias_stride_y) + z * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -1278,9 +1281,6 @@ __kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL); #endif // defined(ACTIVATION_TYPE) - const bool cond_y = y == 0; - const bool cond_x = ((x + 1) * N0 >= N); - // Store output block STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); @@ -1392,6 +1392,9 @@ __kernel void gemm_mm_reshaped_only_rhs_t_texture(IMAGE_DECLARATION(lhs), uint y = get_global_id(1); uint z = get_global_id(2); + const bool cond_y = y == 0; + const bool cond_x = ((x + 1) * N0 >= N); + #if defined(DUMMY_WORK_ITEMS) if((x * N0 >= N) || (y * M0 >= M)) { @@ -1596,7 +1599,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t_texture(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, false, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -1608,7 +1611,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t_texture(IMAGE_DECLARATION(lhs), #else // defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) * bias_stride_y) + z * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -1624,9 +1627,6 @@ __kernel void gemm_mm_reshaped_only_rhs_t_texture(IMAGE_DECLARATION(lhs), ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL); #endif // defined(ACTIVATION_TYPE) - const bool cond_y = y == 0; - const bool cond_x = ((x + 1) * N0 >= N); - // Store output block STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); @@ -1813,6 +1813,9 @@ __kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), uint y = get_global_id(1); uint z = get_global_id(2); + const bool cond_y = y == 0; + const bool cond_x = ((x + 1) * N0 >= N); + #if defined(DUMMY_WORK_ITEMS) if((x * N0 >= N) || (y * M0 >= M)) { @@ -1992,7 +1995,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, false, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -2004,7 +2007,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), #else // defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) * bias_stride_y) + z * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -2020,9 +2023,6 @@ __kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL); #endif // defined(ACTIVATION_TYPE) - const bool cond_y = y == 0; - const bool cond_x = ((x + 1) * N0 >= N); - // Store output block STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); @@ -2130,6 +2130,9 @@ __kernel void gemm_mm_reshaped_only_rhs_nt_texture(IMAGE_DECLARATION(lhs), uint y = get_global_id(1); uint z = get_global_id(2); + const bool cond_y = y == 0; + const bool cond_x = ((x + 1) * N0 >= N); + #if defined(DUMMY_WORK_ITEMS) if((x * N0 >= N) || (y * M0 >= M)) { @@ -2301,7 +2304,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt_texture(IMAGE_DECLARATION(lhs), #if defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (get_global_id(0) * (uint)N0 * sizeof(DATA_TYPE)); - LOAD_BLOCK(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(1, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, 1, PARTIAL_STORE_N0, false, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(1, DATA_TYPE, bias, BETA); @@ -2313,7 +2316,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt_texture(IMAGE_DECLARATION(lhs), #else // defined(BROADCAST_BIAS) __global uchar *bias_addr = bias_ptr + bias_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) * bias_stride_y) + z * bias_stride_z; - LOAD_BLOCK(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero); + LOAD_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, bias, bias_addr, 0, bias_stride_y, zero, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #ifndef UNIT_BETA SCALE_BLOCK(M0, DATA_TYPE, bias, BETA); @@ -2329,9 +2332,6 @@ __kernel void gemm_mm_reshaped_only_rhs_nt_texture(IMAGE_DECLARATION(lhs), ACTIVATION_BLOCK(M0, ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, c, A_VAL, B_VAL); #endif // defined(ACTIVATION_TYPE) - const bool cond_y = y == 0; - const bool cond_x = ((x + 1) * N0 >= N); - // Store output block STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); |