aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/gemmlowp.cl
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2020-10-22 16:37:12 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2020-10-26 14:46:44 +0000
commit9ae06d4986bc3055f7786c1097b465bd321cf8eb (patch)
treeadb50e965f860893fe83e3937026056bf1f054c9 /src/core/CL/cl_kernels/gemmlowp.cl
parent5f91041aef3eb7373d5d2cebcbe60f279da85904 (diff)
downloadComputeLibrary-9ae06d4986bc3055f7786c1097b465bd321cf8eb.tar.gz
COMPMID-3925: Dispatch CLGEMM with no padding y requirement
- Add has_pad_y flag in GEMMKernelInfo - Skip reinterpret as 3D in CLGEMMMatrixMultiplyReshapedOnlyRHSKernel if has_pad_y = false - Add test to validate CLGEMMMatrixMultiplyReshapedOnlyRHSkernel with had_pad_y = false/true - Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel to run with has_pad_y = false/true in CLGEMM - Check if the lhs/dst tensors have pad y. If not, run CLGEMMMatrixMultiplyReshapedOnlyRHSKernel without padding requirement Change-Id: I68bb43389789736d676b899ac7c77fd9138babaf Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4248 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/gemmlowp.cl')
-rw-r--r--src/core/CL/cl_kernels/gemmlowp.cl14
1 files changed, 7 insertions, 7 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index 950faeca0b..4a05635669 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -433,7 +433,7 @@ __kernel void gemmlowp_mm_reshaped_lhs_nt_rhs_t(IMAGE_DECLARATION(lhs),
#if defined(REINTERPRET_OUTPUT_AS_3D)
// The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
- CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad, dst_stride_y);
+ CALCULATE_Z_OFFSET(M0, uint, zout, y * M0, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad, dst_stride_y);
// Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
// multiply dst_stride_z by DEPTH_GEMM3D
@@ -567,7 +567,7 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs),
#if defined(REINTERPRET_INPUT_AS_3D)
// The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
- CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad, lhs_stride_y);
+ CALCULATE_Z_OFFSET(M0, uint, zlhs, y * M0, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad, lhs_stride_y);
// Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
// multiply lhs_stride_z by DEPTH_GEMM3D
@@ -604,7 +604,7 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t(IMAGE_DECLARATION(lhs),
#if defined(REINTERPRET_OUTPUT_AS_3D)
// The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
- CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad, dst_stride_y);
+ CALCULATE_Z_OFFSET(M0, uint, zout, y * M0, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad, dst_stride_y);
// Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
// multiply dst_stride_z by DEPTH_GEMM3D
@@ -781,7 +781,7 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint(IMAG
#if defined(REINTERPRET_INPUT_AS_3D)
// The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
- CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad, lhs_stride_y);
+ CALCULATE_Z_OFFSET(M0, uint, zlhs, y * M0, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad, lhs_stride_y);
// Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
// multiply lhs_stride_z by DEPTH_GEMM3D
@@ -819,7 +819,7 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint(IMAG
#if defined(REINTERPRET_OUTPUT_AS_3D)
// The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
- CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad, dst_stride_y);
+ CALCULATE_Z_OFFSET(M0, uint, zout, y * M0, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad, dst_stride_y);
// Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
// multiply dst_stride_z by DEPTH_GEMM3D
@@ -1009,7 +1009,7 @@ __kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs),
#if defined(REINTERPRET_INPUT_AS_3D)
// The plane (zlhs) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
- CALCULATE_Z_OFFSET(M0, uint, zlhs, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad, lhs_stride_y);
+ CALCULATE_Z_OFFSET(M0, uint, zlhs, COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0), HEIGHT_GEMM3D, DEPTH_GEMM3D, lhs_cross_plane_pad, lhs_stride_y);
// Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
// multiply lhs_stride_z by DEPTH_GEMM3D
@@ -1080,7 +1080,7 @@ __kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs),
#if defined(REINTERPRET_OUTPUT_AS_3D)
// The plane (zout) is calculated dividing M (y * M0) by HEIGHT_GEMM3D
- CALCULATE_Z_OFFSET(M0, uint, zout, y, HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad, dst_stride_y);
+ CALCULATE_Z_OFFSET(M0, uint, zout, COMPUTE_M0_START_ROW(y, M0, PARTIAL_STORE_M0), HEIGHT_GEMM3D, DEPTH_GEMM3D, dst_cross_plane_pad, dst_stride_y);
// Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
// multiply dst_stride_z by DEPTH_GEMM3D