aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2020-10-29 13:36:50 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2020-10-30 15:35:02 +0000
commit839e19865d4b654899d1da5cfb94304841e7f210 (patch)
tree10321574df9e263036a60689fb5fb03608b2f487 /src/runtime/CL
parentc4d45559b00cdbdca80296c23be5939439fbbbd0 (diff)
downloadComputeLibrary-839e19865d4b654899d1da5cfb94304841e7f210.tar.gz
COMPMID-3930: Update CLGEMM heuristic for fp16. Mali-G76
- Since the GEMM kernel can now work without padding, the heuristic requires to be fine-tuned to exploit this feature - The heuristic affects Mali-G76 FP16 only Change-Id: Ia430627f02131ad956ce2219b80c83c8e7cabaf2 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4284 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com>
Diffstat (limited to 'src/runtime/CL')
-rw-r--r--src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp66
-rw-r--r--src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h1
2 files changed, 64 insertions, 3 deletions
diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp
index 73b90568f5..7c6efe3f11 100644
--- a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp
+++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp
@@ -72,7 +72,7 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::select_kernel(const CLGEMMKernelS
static std::map<DataType, FunctionExecutorPtr> gemm_g76_configs =
{
{ DataType::F32, &CLGEMMKernelSelectionBifrost::g76_f32 },
- { DataType::F16, &CLGEMMKernelSelectionBifrost::default_f16 },
+ { DataType::F16, &CLGEMMKernelSelectionBifrost::g76_f16 },
{ DataType::QASYMM8, &CLGEMMKernelSelectionBifrost::default_q8 },
{ DataType::QASYMM8_SIGNED, &CLGEMMKernelSelectionBifrost::default_q8 },
{ DataType::QSYMM8, &CLGEMMKernelSelectionBifrost::default_q8 },
@@ -188,12 +188,10 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::g76_f32(unsigned int m, unsigned
{
return CLGEMMKernelType::NATIVE_V1;
}
-
if(m == 1)
{
return CLGEMMKernelType::RESHAPED_ONLY_RHS;
}
-
if(k <= 496)
{
if(n <= 544)
@@ -239,6 +237,68 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::g76_f32(unsigned int m, unsigned
}
}
+CLGEMMKernelType CLGEMMKernelSelectionBifrost::g76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
+{
+ ARM_COMPUTE_UNUSED(b);
+
+ if (!is_rhs_constant)
+ {
+ return CLGEMMKernelType::NATIVE_V1;
+ }
+
+ if (m == 1)
+ {
+ return CLGEMMKernelType::RESHAPED_ONLY_RHS;
+ }
+
+ const float r_mn = static_cast<float>(m) / static_cast<float>(n);
+ const float r_nk = static_cast<float>(n) / static_cast<float>(k);
+
+ if(k <= 212)
+ {
+ return CLGEMMKernelType::RESHAPED_ONLY_RHS;
+ }
+ else
+ {
+ if(r_nk <= 0.4990234375f)
+ {
+ if(k <= 1392)
+ {
+ return CLGEMMKernelType::RESHAPED_ONLY_RHS;
+ }
+ else
+ {
+ if(m <= 325)
+ {
+ return CLGEMMKernelType::RESHAPED_ONLY_RHS;
+ }
+ else
+ {
+ return CLGEMMKernelType::RESHAPED;
+ }
+ }
+ }
+ else
+ {
+ if(k <= 471)
+ {
+ return CLGEMMKernelType::RESHAPED_ONLY_RHS;
+ }
+ else
+ {
+ if(r_mn <= 0.04475911520421505f)
+ {
+ return CLGEMMKernelType::RESHAPED;
+ }
+ else
+ {
+ return CLGEMMKernelType::RESHAPED_ONLY_RHS;
+ }
+ }
+ }
+ }
+}
+
CLGEMMKernelType CLGEMMKernelSelectionBifrost::g71_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
{
ARM_COMPUTE_UNUSED(b);
diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h
index a495b48301..e3cc8e4a27 100644
--- a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h
+++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h
@@ -45,6 +45,7 @@ public:
private:
CLGEMMKernelType g76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+ CLGEMMKernelType g76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
CLGEMMKernelType g71_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);