diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2020-09-24 13:55:20 +0100 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2020-09-24 15:40:12 +0000 |
commit | c6af9db1bb6b7a2093205f6f3088143bad660ff9 (patch) | |
tree | 254925c8dc238304326ab8e14e128be6130a9fa4 /src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp | |
parent | 840a72cc745c60eccbd26fe192b035ec68b2ee41 (diff) | |
download | ComputeLibrary-c6af9db1bb6b7a2093205f6f3088143bad660ff9.tar.gz |
COMPMID-3673: Update heuristic for GEMM FP16 on Mali G76
- Update heuristic for CLGEMMReshapedKernel - FP16
- Update heuristic for CLGEMMReshapedOnlyRHSKernel - FP16
Change-Id: I35aa73e59d8c2d1bc6b2dd318fd8eeb3e42c27a4
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4026
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp')
-rw-r--r-- | src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp | 87 |
1 files changed, 83 insertions, 4 deletions
diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp index a533f14d02..b5fc074fb4 100644 --- a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp +++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp @@ -206,15 +206,94 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfiguratio std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b) { ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - if(n <= 4) + const float r_mn = static_cast<float>(m) / static_cast<float>(n); + const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f; + + if(workload <= 1049.59f) { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 8, 2, true, true, true, false); + if(b <= 5) + { + if(workload <= 790.39f) + { + return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false); + } + else + { + if(workload <= 982.39f) + { + return configure_lhs_rhs_info(m,n,4,2,4,4,4,false,false,true,false,false); + } + else + { + return configure_lhs_rhs_info(m,n,2,4,4,2,1,false,true,true,false,false); + } + } + } + else + { + if(r_mn <= 0.21f) + { + if(r_mn <= 0.11f) + { + return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false); + } + else + { + return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false); + } + } + else + { + return configure_lhs_rhs_info(m,n,2,4,4,2,2,false,false,true,false,false); + } + } } else { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 8, true, true, true, false); + if(n <= 200) + { + if(workload <= 29772.79f) + { + if(m <= 64.5) + { + return configure_lhs_rhs_info(m,n,4,4,4,2,4,true,false,true,false,false); + } + else + { + return configure_lhs_rhs_info(m,n,4,4,4,2,2,false,true,true,false,false); + } + } + else + { + if(r_mn <= 1.09f) + { + return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false); + } + else + { + return configure_lhs_rhs_info(m,n,4,4,4,2,2,true,true,true,false,false); + } + } + } + else + { + if(m <= 43) + { + return configure_lhs_rhs_info(m,n,4,4,4,2,4,true,false,true,false,false); + } + else + { + if(workload <= 26364.79f) + { + return configure_lhs_rhs_info(m,n,4,4,4,2,2,false,true,true,false,false); + } + else + { + return configure_lhs_rhs_info(m,n,4,4,4,4,4,false,true,true,false,false); + } + } + } } } |