From 491f30c0fff416007d97f4a5a043923861ef7b64 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 2 Nov 2020 15:43:57 +0000 Subject: COMPMID-3939: Update GEMM heuristic Mali-G77 - Update heuristic for GEMM reshaped RHS only - Fix left-over block size in CLGEMMMatrixMultiplyReshapedOlyRHSKernel Change-Id: I34c738821ed2e4a537da4a15058eec164cb6b61f Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4305 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp') diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp index 68f761b9e7..d53aede3c8 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp @@ -247,14 +247,14 @@ void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const CLCompileContext const unsigned int h_gemm_3d = _reinterpret_output_as_3d ? output->info()->dimension(1) : input0->info()->dimension(1); const unsigned int d_gemm_3d = _reinterpret_output_as_3d ? output->info()->dimension(2) : input0->info()->dimension(2); - // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding. - const unsigned int partial_store_m0 = internal_m % lhs_info.m0; - const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0; - // Shrink M0 to be always <= M (internal_m) to prevent out-of-bounds reads. // NOTE: This might have implications on heuristics and performance const unsigned int internal_m0 = std::min(internal_m, lhs_info.m0); + // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding. + const unsigned int partial_store_m0 = internal_m % internal_m0; + const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0; + // Create build options CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type())); -- cgit v1.2.1