Fix GEMMLowp/Batched MatMul mismatches on CPU

- Fixes Column Offset matrix is not being iterated through in y dimension Resolves : COMPMID-5795 Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> Change-Id: I0190474be404b4f0e171855739cfd0a48cbed5bc Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9020 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> 2023-01-25 11:51:50 +0000
committer: Mohmun02 <MohammedSuhail.Munshi@arm.com> 2023-02-01 17:10:42 +0000
commit: 5b9d22353d29bb3b4e6c53924564a42a6ab71050 (patch)
tree: 734c5260925b395a73bbbbaa7d7ab4fac392bac8 /src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
parent: ae72a46e495742863dba44fcf5fdc673c9d2afbc (diff)
download: ComputeLibrary-5b9d22353d29bb3b4e6c53924564a42a6ab71050.tar.gz
1 files changed, 3 insertions, 1 deletions
diff --git a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
index ad8b05e49a..3cb99faee8 100644
--- a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
@@ -73,8 +73,10 @@ public:
      *
      * @param[in]  mm_result      Input tensor info containing the result of @ref CpuGemmLowpMatrixMultiplyKernel. Data type supported: S32
      * @param[in]  vector_sum_col Input row-vector tensor info of sums of all the entries in each column of matrix B.
+     *                            Can be a 1D or 2D tensor, in case of 2D, y dim is the batch dimension
      *                            Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
      * @param[in]  vector_sum_row Input row-vector tensor info of sums of all the entries in each row of matrix A.
+     *                            Can be a 1D or 2D tensor, in case of 2D, y dim is the batch dimension
      * @param[in]  bias           Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
      *                            Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result.
      * @param[out] dst            Output tensor info containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED
@@ -105,7 +107,7 @@ private:
     int32_t                 _a_offset{ 0 };
     int32_t                 _b_offset{ 0 };
     int32_t                 _k_offset{ 0 };
-    bool                    _slide_vector_sum_col{ true };
+    bool                    _is_vector_sum_col_batched{ true };
     GEMMLowpOutputStageInfo _output_stage{ GEMMLowpOutputStageInfo() };
 };
 } // namespace kernels
author	Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>	2023-01-25 11:51:50 +0000
committer	Mohmun02 <MohammedSuhail.Munshi@arm.com>	2023-02-01 17:10:42 +0000
commit	5b9d22353d29bb3b4e6c53924564a42a6ab71050 (patch)
tree	734c5260925b395a73bbbbaa7d7ab4fac392bac8 /src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.h
parent	ae72a46e495742863dba44fcf5fdc673c9d2afbc (diff)
download	ComputeLibrary-5b9d22353d29bb3b4e6c53924564a42a6ab71050.tar.gz