From 839e19865d4b654899d1da5cfb94304841e7f210 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 29 Oct 2020 13:36:50 +0000 Subject: COMPMID-3930: Update CLGEMM heuristic for fp16. Mali-G76 - Since the GEMM kernel can now work without padding, the heuristic requires to be fine-tuned to exploit this feature - The heuristic affects Mali-G76 FP16 only Change-Id: Ia430627f02131ad956ce2219b80c83c8e7cabaf2 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4284 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: SiCong Li --- .../CLGEMMReshapedKernelConfigurationBifrost.cpp | 79 +++------------------- 1 file changed, 11 insertions(+), 68 deletions(-) (limited to 'src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp') diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp index 00c284facc..c1ca187a70 100644 --- a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp +++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp @@ -205,95 +205,38 @@ std::pair CLGEMMReshapedKernelConfiguratio std::pair CLGEMMReshapedKernelConfigurationBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b) { - ARM_COMPUTE_UNUSED(k); - - const float r_mn = static_cast(m) / static_cast(n); const float workload = (static_cast(m) * static_cast(n) * static_cast(b)) / 20.0f; + const float r_mk = static_cast(m) / static_cast(k); + const float r_nk = static_cast(n) / static_cast(k); - if(workload <= 1049.59f) + if(workload <= 1422.40f) { - if(b <= 5) + if(r_mk <= 2.45f) { - if(workload <= 790.39f) + if(workload <= 801.60f) { - return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false); + return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 2, true, false, true, false, false); } else { - if(workload <= 982.39f) - { - return configure_lhs_rhs_info(m, n, 4, 2, 4, 4, 4, false, false, true, false, false); - } - else - { - return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 1, false, true, true, false, false); - } + return configure_lhs_rhs_info(m, n, 4, 2, 4, 2, 2, false, false, true, false, false); } } else { - if(r_mn <= 0.21f) + if(r_nk <= 0.67f) { - if(r_mn <= 0.11f) - { - return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false); - } - else - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false); - } + return configure_lhs_rhs_info(m, n, 4, 2, 4, 2, 2, false, false, true, false, false); } else { - return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false); + return configure_lhs_rhs_info(m, n, 2, 4, 4, 4, 1, false, true, false, true, false); } } } else { - if(n <= 200) - { - if(workload <= 29772.79f) - { - if(m <= 64.5) - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, false, true, false, false); - } - else - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, false, true, true, false, false); - } - } - else - { - if(r_mn <= 1.09f) - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false); - } - else - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, true, true, true, false, false); - } - } - } - else - { - if(m <= 43) - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, false, true, false, false); - } - else - { - if(workload <= 26364.79f) - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, false, true, true, false, false); - } - else - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false); - } - } - } + return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, true, true, false, false); } } -- cgit v1.2.1