aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2020-04-14 12:09:43 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2020-04-14 16:27:09 +0000
commitf6ffe99c96766c31ab0d9075e8be8fcfe23e9d80 (patch)
tree99d1e014ee5b58d88d14aa9ec37b70f42cb41efb
parent12910f27933e269a4496c9342090d91cbaa421b6 (diff)
downloadComputeLibrary-f6ffe99c96766c31ab0d9075e8be8fcfe23e9d80.tar.gz
COMPMID-3349: Fixed performance regression Mali-G71
Change-Id: I2c9cc9ebd7fe6cc6431d25ac795046b0539db616 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3014 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h2
-rw-r--r--src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp118
2 files changed, 109 insertions, 11 deletions
diff --git a/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h b/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h
index 94311fb3aa..04906e317f 100644
--- a/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h
+++ b/arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h
@@ -44,6 +44,8 @@ public:
CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams &params) override;
private:
+ CLGEMMKernelType g76_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
+ CLGEMMKernelType g71_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp
index 4542f53136..d30eaa9edc 100644
--- a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp
+++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp
@@ -46,8 +46,8 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::select_kernel(const CLGEMMKernelS
using FunctionExecutorPtr = CLGEMMKernelType (CLGEMMKernelSelectionBifrost::*)(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant);
- // Configurations for Bifrost architectures
- static std::map<DataType, FunctionExecutorPtr> gemm_configs =
+ // Default configurations for Bifrost architectures
+ static std::map<DataType, FunctionExecutorPtr> gemm_default_configs =
{
{ DataType::F32, &CLGEMMKernelSelectionBifrost::default_f32 },
{ DataType::F16, &CLGEMMKernelSelectionBifrost::default_f16 },
@@ -57,14 +57,51 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::select_kernel(const CLGEMMKernelS
{ DataType::QSYMM8_PER_CHANNEL, &CLGEMMKernelSelectionBifrost::default_q8 }
};
+ // Mali-G71 configurations
+ static std::map<DataType, FunctionExecutorPtr> gemm_g71_configs =
+ {
+ { DataType::F32, &CLGEMMKernelSelectionBifrost::default_f32 },
+ { DataType::F16, &CLGEMMKernelSelectionBifrost::g71_f16 },
+ { DataType::QASYMM8, &CLGEMMKernelSelectionBifrost::default_q8 },
+ { DataType::QASYMM8_SIGNED, &CLGEMMKernelSelectionBifrost::default_q8 },
+ { DataType::QSYMM8, &CLGEMMKernelSelectionBifrost::default_q8 },
+ { DataType::QSYMM8_PER_CHANNEL, &CLGEMMKernelSelectionBifrost::default_q8 }
+ };
+
+ // Mali-G76 configurations
+ static std::map<DataType, FunctionExecutorPtr> gemm_g76_configs =
+ {
+ { DataType::F32, &CLGEMMKernelSelectionBifrost::g76_f32 },
+ { DataType::F16, &CLGEMMKernelSelectionBifrost::default_f16 },
+ { DataType::QASYMM8, &CLGEMMKernelSelectionBifrost::default_q8 },
+ { DataType::QASYMM8_SIGNED, &CLGEMMKernelSelectionBifrost::default_q8 },
+ { DataType::QSYMM8, &CLGEMMKernelSelectionBifrost::default_q8 },
+ { DataType::QSYMM8_PER_CHANNEL, &CLGEMMKernelSelectionBifrost::default_q8 }
+ };
+
const DataType data_type = params.data_type;
- if(gemm_configs.find(data_type) != gemm_configs.end())
+ switch(_target)
{
- return (this->*gemm_configs[data_type])(params.m, params.n, params.k, params.is_rhs_constant);
+ case GPUTarget::G71:
+ if(gemm_g71_configs.find(data_type) != gemm_g71_configs.end())
+ {
+ return (this->*gemm_g71_configs[data_type])(params.m, params.n, params.k, params.is_rhs_constant);
+ }
+ ARM_COMPUTE_ERROR("Not supported data type");
+ case GPUTarget::G76:
+ if(gemm_g76_configs.find(data_type) != gemm_g76_configs.end())
+ {
+ return (this->*gemm_g76_configs[data_type])(params.m, params.n, params.k, params.is_rhs_constant);
+ }
+ ARM_COMPUTE_ERROR("Not supported data type");
+ default:
+ if(gemm_default_configs.find(data_type) != gemm_default_configs.end())
+ {
+ return (this->*gemm_default_configs[data_type])(params.m, params.n, params.k, params.is_rhs_constant);
+ }
+ ARM_COMPUTE_ERROR("Not supported data type");
}
-
- ARM_COMPUTE_ERROR("Not supported data type");
}
CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
@@ -75,7 +112,7 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f32(unsigned int m, unsig
{
if((m > 1) && (n < 16))
{
- gemm_type = CLGEMMKernelType::RESHAPED;
+ gemm_type = CLGEMMKernelType::RESHAPED_V1;
}
else if(m == 1)
{
@@ -85,13 +122,22 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f32(unsigned int m, unsig
{
if((k > 256) && (m > 4))
{
- gemm_type = CLGEMMKernelType::RESHAPED;
+ constexpr float alpha = 3.2f;
+ constexpr float fact0 = 1.51f;
+ constexpr float fact1 = 1.66f;
+ constexpr float ops = 12.0f;
+ const float scale = k > 1024 ? 1.07f : 1.0f;
+ gemm_type = (alpha + ((n * fact0) / ops) < ((fact1 * n * scale) / ops)) ? CLGEMMKernelType::RESHAPED_V1 : CLGEMMKernelType::NATIVE_V1;
}
else
{
- gemm_type = CLGEMMKernelType::RESHAPED_ONLY_RHS;
+ gemm_type = CLGEMMKernelType::NATIVE_V1;
}
}
+
+ const auto workload = static_cast<float>((m * n) / 20.0f);
+
+ gemm_type = ((workload > 1600.0f) && (gemm_type == CLGEMMKernelType::RESHAPED_V1)) ? CLGEMMKernelType::RESHAPED : gemm_type;
}
return gemm_type;
@@ -99,6 +145,26 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f32(unsigned int m, unsig
CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
{
+ ARM_COMPUTE_UNUSED(n, k);
+ if(is_rhs_constant)
+ {
+ if(m == 1)
+ {
+ return CLGEMMKernelType::RESHAPED_ONLY_RHS;
+ }
+ else
+ {
+ return CLGEMMKernelType::RESHAPED;
+ }
+ }
+ else
+ {
+ return CLGEMMKernelType::NATIVE_V1;
+ }
+}
+
+CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
+{
if(is_rhs_constant)
{
if(m == 1)
@@ -123,13 +189,43 @@ CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_f16(unsigned int m, unsig
}
}
-CLGEMMKernelType CLGEMMKernelSelectionBifrost::default_q8(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
+CLGEMMKernelType CLGEMMKernelSelectionBifrost::g76_f32(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
+{
+ CLGEMMKernelType gemm_type = CLGEMMKernelType::NATIVE_V1;
+
+ if(is_rhs_constant)
+ {
+ if((m > 1) && (n < 16))
+ {
+ gemm_type = CLGEMMKernelType::RESHAPED;
+ }
+ else if(m == 1)
+ {
+ gemm_type = CLGEMMKernelType::RESHAPED_ONLY_RHS;
+ }
+ else
+ {
+ if((k > 256) && (m > 4))
+ {
+ gemm_type = CLGEMMKernelType::RESHAPED;
+ }
+ else
+ {
+ gemm_type = CLGEMMKernelType::RESHAPED_ONLY_RHS;
+ }
+ }
+ }
+
+ return gemm_type;
+}
+
+CLGEMMKernelType CLGEMMKernelSelectionBifrost::g71_f16(unsigned int m, unsigned int n, unsigned int k, bool is_rhs_constant)
{
if(is_rhs_constant)
{
if(m == 1)
{
- if((n > k) && gpu_target_is_in(_target, GPUTarget::G71))
+ if(n > k)
{
return CLGEMMKernelType::NATIVE_V1;
}