From c6af9db1bb6b7a2093205f6f3088143bad660ff9 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 24 Sep 2020 13:55:20 +0100 Subject: COMPMID-3673: Update heuristic for GEMM FP16 on Mali G76 - Update heuristic for CLGEMMReshapedKernel - FP16 - Update heuristic for CLGEMMReshapedOnlyRHSKernel - FP16 Change-Id: I35aa73e59d8c2d1bc6b2dd318fd8eeb3e42c27a4 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4026 Tested-by: Arm Jenkins Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins --- .../CLGEMMReshapedKernelConfigurationBifrost.cpp | 87 +++++++++++++++++++++- 1 file changed, 83 insertions(+), 4 deletions(-) (limited to 'src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp') diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp index a533f14d02..b5fc074fb4 100644 --- a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp +++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp @@ -206,15 +206,94 @@ std::pair CLGEMMReshapedKernelConfiguratio std::pair CLGEMMReshapedKernelConfigurationBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b) { ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - if(n <= 4) + const float r_mn = static_cast(m) / static_cast(n); + const float workload = (static_cast(m) * static_cast(n) * static_cast(b)) / 20.0f; + + if(workload <= 1049.59f) { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 8, 2, true, true, true, false); + if(b <= 5) + { + if(workload <= 790.39f) + { + return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false); + } + else + { + if(workload <= 982.39f) + { + return configure_lhs_rhs_info(m,n,4,2,4,4,4,false,false,true,false,false); + } + else + { + return configure_lhs_rhs_info(m,n,2,4,4,2,1,false,true,true,false,false); + } + } + } + else + { + if(r_mn <= 0.21f) + { + if(r_mn <= 0.11f) + { + return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false); + } + else + { + return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false); + } + } + else + { + return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false); + } + } } else { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 8, true, true, true, false); + if(n <= 200) + { + if(workload <= 29772.79f) + { + if(m <= 64.5) + { + return configure_lhs_rhs_info(m,n,4,4,4,2,4,true,false,true,false,false); + } + else + { + return configure_lhs_rhs_info(m,n,4,4,4,2,2,false,true,true,false,false); + } + } + else + { + if(r_mn <= 1.09f) + { + return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false); + } + else + { + return configure_lhs_rhs_info(m,n,4,4,4,2,2,true,true,true,false,false); + } + } + } + else + { + if(m <= 43) + { + return configure_lhs_rhs_info(m,n,4,4,4,2,4,true,false,true,false,false); + } + else + { + if(workload <= 26364.79f) + { + return configure_lhs_rhs_info(m,n,4,4,4,2,2,false,true,true,false,false); + } + else + { + return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false); + } + } + } } } -- cgit v1.2.1