aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/gemmlowp.cl
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2020-10-21 12:34:38 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-10-29 12:05:18 +0000
commit8cf753f68a2adb1234eaeb8ac79a83a8bec14051 (patch)
treef97dea1ce4d777bb5f8b52fb89a14eff55c954e2 /src/core/CL/cl_kernels/gemmlowp.cl
parent11d73272b8df5ceb2629fb916b84c768b7c5c65a (diff)
downloadComputeLibrary-8cf753f68a2adb1234eaeb8ac79a83a8bec14051.tar.gz
COMPMID-3720: Remove OpenCL padding CLGEMMLowpMatrixMultiplyReshapedKernel
Change-Id: Ie70ba877f0356661a055f026124904bbf2181a33 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4251 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/gemmlowp.cl')
-rw-r--r--src/core/CL/cl_kernels/gemmlowp.cl11
1 files changed, 8 insertions, 3 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index 4a05635669..059c2e14df 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -290,7 +290,7 @@
(VECTOR_ACC_TYPE, k0, a, b, c); \
})
-#if defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(M) && defined(N)
+#if defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(M) && defined(N) && defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0)
/** This OpenCL kernel computes the matrix multiplication between 2 matrices with QASYMM/QASYMM_SIGNED data type.
* The LHS matrix must be reshaped with @ref CLGEMMReshapeLHSMatrixKernel and the M0xK0 must be NOT transposed
* The RHS matrix must be reshaped with @ref CLGEMMReshapeRHSMatrixKernel and the K0xN0 must be transposed
@@ -447,7 +447,12 @@ __kernel void gemmlowp_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs),
#endif // defined(REINTERPRET_OUTPUT_AS_3D)
// Convert and store output block
- CONVERT_STORE_BLOCK(M0, N0, int, c, dst_addr, dst_stride_y, zout);
+ const bool cond_y = ((get_global_id(1) + 1) * M0 >= M);
+ const bool cond_x = ((get_global_id(0) + 1) * N0 >= N);
+
+ // Store output block
+ REPEAT_VAR_INIT_CONVERT_SAT(M0, VEC_DATA_TYPE(int, N0), c, c_lp);
+ STORE_BLOCK_BOUNDARY_AWARE(M0, N0, int, c_lp, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, cond_y, cond_x);
#undef LHS_BLOCK_SIZE
#undef LHS_OFFSET_X
@@ -456,7 +461,7 @@ __kernel void gemmlowp_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs),
#undef RHS_OFFSET_X
#undef RHS_STEP_X
}
-#endif // defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(K)
+#endif // defined(M0) && defined(N0) && defined(K0) && defined(V0) && defined(H0) && defined(M) && defined(N) && defined(PARTIAL_STORE_M0) && defined(PARTIAL_STORE_N0)
#if defined(M0) && defined(N0) && defined(K0) && defined(H0) && defined(K)