COMPMID-3608: Fix z index in gemmlowp_mm_reshaped_only kernel

The issue concerned gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint. In particular the issue was with the z index to access the elements from the lhs reduced tensor used to calculate the offset contribution. Change-Id: I74f6398fc08894fc323ccd04fda9220752652d31 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3726 Reviewed-by: TeresaARM <teresa.charlinreyes@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2020-08-12 14:12:28 +0100
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> 2020-08-12 15:18:18 +0000
commit: 27423f0c3f005155637ef7f1eb8fd31a06a9f205 (patch)
tree: f117b4ef4dad188c6282519280c72d7b639939cd /src/core/CL
parent: 088d63aae947efd8bbcfd4d27c1f50a6af79e3b9 (diff)
download: ComputeLibrary-27423f0c3f005155637ef7f1eb8fd31a06a9f205.tar.gz
1 files changed, 4 insertions, 9 deletions
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index aac8d5a1e2..b4ac00535e 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -835,11 +835,6 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint(IMAG
     // Convert result of matrix multiplication to S32
     REPEAT_VAR_INIT_CONVERT_SAT(M0, VEC_DATA_TYPE(int, N0), c, c_int);
 
-    int batch_id = z;
-#if defined(DEPTH_GEMM3D)
-    batch_id /= (int)DEPTH_GEMM3D;
-#endif // defined(DEPTH_GEMM3D)
-
     // Offset contribution: c += (A_OFFSET * sum_col) + (B_OFFSET * sum_row) +  K_OFFSET;
     REPEAT_VAR_INIT_TO_CONST(M0, VEC_DATA_TYPE(int, N0), offset_s32_, K_OFFSET);
 
@@ -859,11 +854,11 @@ __kernel void gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint(IMAG
 
 #if defined(B_OFFSET)
     // Compute the offset contribution due to B_OFFSET
+    // Note: The sum_row tensor is generated through CLGEMMLowpMatrixAReductionKernel which
+    // does not introduce paddings. For this reason is safe to access the tensor in this manner
+    // without considering that the coordinate "y" could come from an input 3D tensor
     __global uchar *sum_row_addr = sum_row_ptr + sum_row_offset_first_element_in_bytes + (y * (uint)M0) * sizeof(int) + z * sum_row_stride_y;
 
-#if defined(HEIGHT_GEMM3D) && defined(DEPTH_GEMM3D)
-    sum_row_addr += (batch_id % (int)DEPTH_GEMM3D) * (int)HEIGHT_GEMM3D * sizeof(int);
-#endif // defined(HEIGHT_GEMM3D) && defined(DEPTH_GEMM3D)
     LOAD_SCALAR_AS_VECTOR(M0, N0, int, b_offset_s32_, sum_row_addr, 0, sum_row_stride_x);
 
     REPEAT_MLA_VAR_WITH_CONST_VEC(M0, offset_s32_, b_offset_s32_, (VEC_DATA_TYPE(int, N0))B_OFFSET);
@@ -2241,4 +2236,4 @@ __kernel void gemmlowp_output_stage_quantize_down_float(TENSOR3D_DECLARATION(src
     // Store the result
     vstore4(res, 0, (__global OUTPUT_DATA_TYPE *)dst_addr);
 }
-#endif // defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET)
-\ No newline at end of file
+#endif // defined(REAL_MULTIPLIER) && defined(OUTPUT_OFFSET)
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2020-08-12 14:12:28 +0100
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	2020-08-12 15:18:18 +0000
commit	27423f0c3f005155637ef7f1eb8fd31a06a9f205 (patch)
tree	f117b4ef4dad188c6282519280c72d7b639939cd /src/core/CL
parent	088d63aae947efd8bbcfd4d27c1f50a6af79e3b9 (diff)
download	ComputeLibrary-27423f0c3f005155637ef7f1eb8fd31a06a9f205.tar.gz