aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2020-11-10 10:41:37 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2020-11-13 11:52:32 +0000
commiteaca67a249b5338fb286c8e7c24253c5fc8ca7ac (patch)
tree88c92a7599f700f416042e9339e250a25f798c7a /src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
parent1a0a4bc78a12e85e1bd6b3207f244c91566ebdce (diff)
downloadComputeLibrary-eaca67a249b5338fb286c8e7c24253c5fc8ca7ac.tar.gz
COMPMID-3959: Update Mali-G52 heuristic for CLGEMM - F32
- Add heuristic in CLGEMMKernelSelection - Add heuristic in CLGEMMReshapedRHSOnly - Add heuristic in CLGEMMReshaped Change-Id: Ibaa13398f7a5976418a0ab1b6696ace09cc480fa Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4366 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp')
-rw-r--r--src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp110
1 files changed, 110 insertions, 0 deletions
diff --git a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
index c1ca187a70..70992974a3 100644
--- a/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
+++ b/src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp
@@ -60,6 +60,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfiguratio
{ DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedKernelConfigurationBifrost::configure_G76_u8 }
};
+ // Configurations for Mali-G52
+ static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G52 =
+ {
+ { DataType::F32, &CLGEMMReshapedKernelConfigurationBifrost::configure_G52_f32 },
+ { DataType::F16, &CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_f16 },
+ { DataType::QASYMM8, &CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_u8 },
+ { DataType::QSYMM8, &CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_u8 },
+ { DataType::QASYMM8_SIGNED, &CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_u8 },
+ { DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_u8 }
+ };
+
// Configurations for Mali-G7x
static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G7x =
{
@@ -153,6 +164,105 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfiguratio
}
}
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+{
+ const float r_mn = static_cast<float>(m) / static_cast<float>(n);
+ const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
+ const float r_mk = static_cast<float>(m) / static_cast<float>(k);
+ const float r_nk = static_cast<float>(n) / static_cast<float>(k);
+
+ GEMMLHSMatrixInfo lhs_info_buf;
+ GEMMRHSMatrixInfo rhs_info_buf;
+ GEMMLHSMatrixInfo lhs_info_img;
+ GEMMRHSMatrixInfo rhs_info_img;
+
+ if(workload <= 274.4000f)
+ {
+ if(r_nk <= 0.7461f)
+ {
+ if(r_mn <= 21.1667f)
+ {
+ return configure_lhs_rhs_info(m, n, 4, 2, 4, 4, 4, false, true, true, false, false);
+ }
+ else
+ {
+ std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+
+ return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
+ std::make_pair(lhs_info_buf, rhs_info_buf),
+ n, k, b, DataType::F32);
+ }
+ }
+ else
+ {
+ std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+
+ return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
+ std::make_pair(lhs_info_buf, rhs_info_buf),
+ n, k, b, DataType::F32);
+ }
+ }
+ else
+ {
+ if(r_mk <= 17.3926f)
+ {
+ if(workload <= 542.4000f)
+ {
+ std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+
+ return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
+ std::make_pair(lhs_info_buf, rhs_info_buf),
+ n, k, b, DataType::F32);
+ }
+ else
+ {
+ std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);
+
+ return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
+ std::make_pair(lhs_info_buf, rhs_info_buf),
+ n, k, b, DataType::F32);
+ }
+ }
+ else
+ {
+ if(r_nk <= 0.5463f)
+ {
+ if(workload <= 11767.6001f)
+ {
+ std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+
+ return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
+ std::make_pair(lhs_info_buf, rhs_info_buf),
+ n, k, b, DataType::F32);
+ }
+ else
+ {
+ std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);
+
+ return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
+ std::make_pair(lhs_info_buf, rhs_info_buf),
+ n, k, b, DataType::F32);
+ }
+ }
+ else
+ {
+ std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+
+ return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
+ std::make_pair(lhs_info_buf, rhs_info_buf),
+ n, k, b, DataType::F32);
+ }
+ }
+ }
+}
+
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);