diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2020-10-15 17:39:41 +0100 |
---|---|---|
committer | Giorgio Arena <giorgio.arena@arm.com> | 2020-10-21 13:13:53 +0000 |
commit | 1e2af2acc4cb789ba4c0e6935a4581ce4a050609 (patch) | |
tree | 32aacf11e7f5deb271e2177f36920b57c9afa5ab /src/core/CL/cl_kernels/gemm.cl | |
parent | ed5fb39d1d9d3e56d26b621cd1d56ceb39270701 (diff) | |
download | ComputeLibrary-1e2af2acc4cb789ba4c0e6935a4581ce4a050609.tar.gz |
COMPMID-3712 Remove OpenCL padding: CLDepthwiseConvolutionLayer3x3NHWCKernel FP16/32
Removed unused N from partial block loading macro
Created utility to assert change in padding
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Change-Id: Ifdd30c66dbf5f2842c6b2d939000613d5011708e
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4192
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/gemm.cl')
-rw-r--r-- | src/core/CL/cl_kernels/gemm.cl | 26 |
1 files changed, 13 insertions, 13 deletions
diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl index f29f2c1b48..653aa5591c 100644 --- a/src/core/CL/cl_kernels/gemm.cl +++ b/src/core/CL/cl_kernels/gemm.cl @@ -1282,7 +1282,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs), const bool cond_x = ((x + 1) * N0 >= N); // Store output block - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #undef RHS_BLOCK_SIZE #undef RHS_OFFSET_X @@ -1628,7 +1628,7 @@ __kernel void gemm_mm_reshaped_only_rhs_t_texture(IMAGE_DECLARATION(lhs), const bool cond_x = ((x + 1) * N0 >= N); // Store output block - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #undef RHS_BLOCK_SIZE #undef RHS_OFFSET_X @@ -2024,7 +2024,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt(IMAGE_DECLARATION(lhs), const bool cond_x = ((x + 1) * N0 >= N); // Store output block - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #undef RHS_BLOCK_SIZE #undef RHS_OFFSET_X @@ -2333,7 +2333,7 @@ __kernel void gemm_mm_reshaped_only_rhs_nt_texture(IMAGE_DECLARATION(lhs), const bool cond_x = ((x + 1) * N0 >= N); // Store output block - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #undef RHS_BLOCK_SIZE #undef RHS_OFFSET_X @@ -2775,9 +2775,9 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs), // Store output block #if defined(MIXED_PRECISION) CONVERT_BLOCK(M0, N0, DATA_TYPE, c, c_lp); - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c_lp, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c_lp, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #else // defined(MIXED_PRECISION) - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #endif // defined(MIXED_PRECISION) #undef LHS_BLOCK_SIZE @@ -3045,9 +3045,9 @@ __kernel void gemm_mm_reshaped_lhs_nt_rhs_t_texture(IMAGE_DECLARATION(lhs), // Store output block #if defined(MIXED_PRECISION) CONVERT_BLOCK(M0, N0, DATA_TYPE, c, c_lp); - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c_lp, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c_lp, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #else // defined(MIXED_PRECISION) - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #endif // defined(MIXED_PRECISION) #undef LHS_BLOCK_SIZE @@ -3539,9 +3539,9 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt(IMAGE_DECLARATION(lhs), // Store output block #if defined(MIXED_PRECISION) CONVERT_BLOCK(M0, N0, DATA_TYPE, c, c_lp); - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c_lp, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c_lp, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #else // defined(MIXED_PRECISION) - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #endif // defined(MIXED_PRECISION) #undef LHS_BLOCK_SIZE @@ -3906,9 +3906,9 @@ __kernel void gemm_mm_reshaped_lhs_t_rhs_nt_texture(IMAGE_DECLARATION(lhs), // Store output block #if defined(MIXED_PRECISION) CONVERT_BLOCK(M0, N0, DATA_TYPE, c, c_lp); - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c_lp, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c_lp, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #else // defined(MIXED_PRECISION) - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #endif // defined(MIXED_PRECISION) #undef LHS_BLOCK_SIZE @@ -4287,7 +4287,7 @@ __kernel void gemm_mm_native(IMAGE_DECLARATION(lhs), const bool cond_x = ((x + 1) * N0 >= N); // Store output block - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x); + STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x); #undef RHS_BLOCK_SIZE #undef RHS_OFFSET_X |