From 60ab4e66ea3cb85042035fd1aafbfea666bb4ea7 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 28 Apr 2023 10:40:07 +0100 Subject: Fix export_to_cl_image issue in the fp16 GeMM implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - The issue affects Fp16 GeMM on Arm® Mali™-G78 - The issue was caused by a missing fallback implementation for the case when export_to_cl_image cannot be used - The new implementation fixes this issues and make the GeMM implementation for M=1 also faster (4-5% on various networks with fully connected at the end of the model) - This patch also enables the H0=0 case in the GeMM examples Resolves COMPMID-5812, COMPMID-5688, and COMPMID-6147 Signed-off-by: Gian Marco Iodice Change-Id: Ib7b355ae25337962598dd2ba21665b1a6b48686f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/514664 Tested-by: bsgcomp Reviewed-by: Viet-Hoa Do Comments-Addressed: bsgcomp Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9526 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- .../ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp | 26 ++++++++++------------ 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'src/gpu') diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp index 76551b076a..d08bf84c72 100644 --- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp +++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp @@ -562,21 +562,19 @@ std::pair ClGemmDefaultConfigReshapedRhsOn if(m == 1) { - if(r_mn <= 0.0045f) + const GeMMConfigsMatrix configs_mnkb_best = { - if(workload <= 278.7000f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 8, 0, 0, 0, 1, 1); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 32, 0, 0, 1, 0, 0); - } - } - else - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 8, 0, 0, 1, 0, 0); - } + { 1, 8984, 640, 1, 1, 4, 2, 1, 0, 1, 0, 1, 1, 0 }, + { 1, 420, 392, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 }, + { 1, 644, 5288, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 }, + { 1, 6512, 6404, 1, 1, 2, 2, 1, 0, 1, 0, 1, 1, 0 }, + { 1, 5304, 640, 1, 1, 2, 2, 1, 0, 1, 0, 1, 0, 0 }, + { 1, 1352, 1520, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 }, + { 1, 4096, 25088, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 }, + { 1, 732, 8988, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 } + }; + + return find_lhs_rhs_info(configs_mnkb_best, m, n, k, b); } else { -- cgit v1.2.1