aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2020-10-29 13:36:50 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2020-10-30 15:35:02 +0000
commit839e19865d4b654899d1da5cfb94304841e7f210 (patch)
tree10321574df9e263036a60689fb5fb03608b2f487 /src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
parentc4d45559b00cdbdca80296c23be5939439fbbbd0 (diff)
downloadComputeLibrary-839e19865d4b654899d1da5cfb94304841e7f210.tar.gz
COMPMID-3930: Update CLGEMM heuristic for fp16. Mali-G76
- Since the GEMM kernel can now work without padding, the heuristic requires to be fine-tuned to exploit this feature - The heuristic affects Mali-G76 FP16 only Change-Id: Ia430627f02131ad956ce2219b80c83c8e7cabaf2 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4284 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com>
Diffstat (limited to 'src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp')
-rw-r--r--src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp79
1 files changed, 11 insertions, 68 deletions
diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
index 00c284facc..c1ca187a70 100644
--- a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
+++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
@@ -205,95 +205,38 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfiguratio
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
- ARM_COMPUTE_UNUSED(k);
-
- const float r_mn = static_cast<float>(m) / static_cast<float>(n);
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
+ const float r_mk = static_cast<float>(m) / static_cast<float>(k);
+ const float r_nk = static_cast<float>(n) / static_cast<float>(k);
- if(workload <= 1049.59f)
+ if(workload <= 1422.40f)
{
- if(b <= 5)
+ if(r_mk <= 2.45f)
{
- if(workload <= 790.39f)
+ if(workload <= 801.60f)
{
- return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false);
+ return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 2, true, false, true, false, false);
}
else
{
- if(workload <= 982.39f)
- {
- return configure_lhs_rhs_info(m, n, 4, 2, 4, 4, 4, false, false, true, false, false);
- }
- else
- {
- return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 1, false, true, true, false, false);
- }
+ return configure_lhs_rhs_info(m, n, 4, 2, 4, 2, 2, false, false, true, false, false);
}
}
else
{
- if(r_mn <= 0.21f)
+ if(r_nk <= 0.67f)
{
- if(r_mn <= 0.11f)
- {
- return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false);
- }
- else
- {
- return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false);
- }
+ return configure_lhs_rhs_info(m, n, 4, 2, 4, 2, 2, false, false, true, false, false);
}
else
{
- return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false);
+ return configure_lhs_rhs_info(m, n, 2, 4, 4, 4, 1, false, true, false, true, false);
}
}
}
else
{
- if(n <= 200)
- {
- if(workload <= 29772.79f)
- {
- if(m <= 64.5)
- {
- return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, false, true, false, false);
- }
- else
- {
- return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, false, true, true, false, false);
- }
- }
- else
- {
- if(r_mn <= 1.09f)
- {
- return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false);
- }
- else
- {
- return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, true, true, true, false, false);
- }
- }
- }
- else
- {
- if(m <= 43)
- {
- return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, false, true, false, false);
- }
- else
- {
- if(workload <= 26364.79f)
- {
- return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, false, true, true, false, false);
- }
- else
- {
- return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false);
- }
- }
- }
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, true, true, false, false);
}
}