aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/gemmlowp.cl
diff options
context:
space:
mode:
authormorgolock <pablo.tello@arm.com>2020-10-12 14:00:43 +0100
committerPablo Marquez <pablo.tello@arm.com>2020-10-15 17:14:05 +0000
commitcf343e3798d2a8c2ad2fcac488e4b78e2b5c968d (patch)
tree52aeb352689b82e34ff98730d52d970e79d3e7ff /src/core/CL/cl_kernels/gemmlowp.cl
parent3e77c27a07af070677a3a7e34fb3dfc519b7cbd1 (diff)
downloadComputeLibrary-cf343e3798d2a8c2ad2fcac488e4b78e2b5c968d.tar.gz
COMPMID-3719: Remove OpenCL padding: CLGEMMLowpMatrixMultiplyNativeKernel
Change-Id: Iee28abcbba1e7b9e2f3aaa55685936dce815d5a3 Signed-off-by: morgolock <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4141 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/gemmlowp.cl')
-rw-r--r--src/core/CL/cl_kernels/gemmlowp.cl15
1 files changed, 10 insertions, 5 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index 8405a7beb7..29314ec581 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -992,10 +992,11 @@ __kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs),
#endif // defined(DUMMY_WORK_ITEMS)
// Compute LHS matrix address
- uint lhs_offset = lhs_offset_first_element_in_bytes + y * M0 * (uint)lhs_stride_y;
+ uint lhs_offset = lhs_offset_first_element_in_bytes + COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) * (uint)lhs_stride_y;
// Compute RHS matrix address
- uint rhs_offset = rhs_offset_first_element_in_bytes + x * N0;
+ uint rhs_offset = rhs_offset_first_element_in_bytes + x * N0 * sizeof(DATA_TYPE);
+
#if defined(MATRIX_B_DEPTH)
// Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
@@ -1074,7 +1075,8 @@ __kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs),
rhs_offset += rhs_stride_y;
}
- __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes + (x * (uint)N0) * sizeof(int) + (y * (uint)M0 * dst_stride_y);
+ __global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes + (x * (uint)N0 * sizeof(int)) + (COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0) * dst_stride_y);
+
REPEAT_VAR_INIT_TO_CONST(M0, uint, zout, 0); //uint zout0=0,zout1=0,zout2=0,... zout7=0;
@@ -1092,9 +1094,12 @@ __kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs),
dst_addr += z * dst_stride_z;
#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+ const bool cond_y = y == 0;
+ const bool cond_x = ((x + 1) * N0 >= N);
- // Convert and store output block
- CONVERT_STORE_BLOCK(M0, N0, int, c, dst_addr, dst_stride_y, zout);
+
+ // Store output block
+ STORE_BLOCK_BOUNDARY_AWARE(M0, N0, int, c, dst_addr, dst_stride_y, zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, N, cond_y, cond_x);
}
#endif // defined(M0) && defined(N0) && defined(K0) && defined(K)