aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2023-04-26 14:55:02 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2023-05-02 09:27:49 +0000
commit7a0f1bdaf74cde263b2919c7d1652b0cb87a94f3 (patch)
tree62886dac919eb95811efd76d907960dfddef0b61 /src/runtime/CL
parenta62129a02397ba87171ebf4477795f628dcec0f6 (diff)
downloadComputeLibrary-7a0f1bdaf74cde263b2919c7d1652b0cb87a94f3.tar.gz
Add fp16 GeMM heuristic for Arm® Mali™-G710
- Performance improvements on various networks between 5-20% Resolves COMPMID-6030 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Change-Id: Idcf7de57e6f5a94a6a94ec78229dd53c24de44f4 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/514481 Tested-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9524 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL')
-rw-r--r--src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp34
-rw-r--r--src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h3
2 files changed, 28 insertions, 9 deletions
diff --git a/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp b/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp
index 29d3177424..9e779d3752 100644
--- a/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp
+++ b/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp
@@ -79,6 +79,17 @@ CLGEMMKernelType CLGEMMDefaultTypeValhall::select_kernel(const CLGEMMKernelSelec
{ DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultTypeValhall::default_q8 }
};
+ // Mali-G710 and Mali-G610 configurations
+ static std::map<DataType, FunctionExecutorPtr> gemm_g710_configs =
+ {
+ { DataType::F32, &CLGEMMDefaultTypeValhall::default_f32 },
+ { DataType::F16, &CLGEMMDefaultTypeValhall::g710_f16 },
+ { DataType::QASYMM8, &CLGEMMDefaultTypeValhall::default_q8 },
+ { DataType::QASYMM8_SIGNED, &CLGEMMDefaultTypeValhall::default_q8 },
+ { DataType::QSYMM8, &CLGEMMDefaultTypeValhall::default_q8 },
+ { DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultTypeValhall::default_q8 }
+ };
+
// Mali-G715 and Mali-G615 configurations
static std::map<DataType, FunctionExecutorPtr> gemm_g715_configs =
{
@@ -94,6 +105,13 @@ CLGEMMKernelType CLGEMMDefaultTypeValhall::select_kernel(const CLGEMMKernelSelec
switch(_target)
{
+ case GPUTarget::G710:
+ case GPUTarget::G610:
+ if(gemm_g710_configs.find(data_type) != gemm_g710_configs.end())
+ {
+ return (this->*gemm_g710_configs[data_type])(params.m, params.n, params.k, params.b, params.is_rhs_constant);
+ }
+ ARM_COMPUTE_ERROR("Not supported data type");
case GPUTarget::G715:
case GPUTarget::G615:
if(gemm_g715_configs.find(data_type) != gemm_g715_configs.end())
@@ -140,14 +158,14 @@ CLGEMMKernelType CLGEMMDefaultTypeValhall::g77_f16(unsigned int m, unsigned int
{
ARM_COMPUTE_UNUSED(m, n, k, b);
- if(!is_rhs_constant)
- {
- return CLGEMMKernelType::NATIVE;
- }
- else
- {
- return CLGEMMKernelType::RESHAPED_ONLY_RHS;
- }
+ return is_rhs_constant ? CLGEMMKernelType::RESHAPED_ONLY_RHS : CLGEMMKernelType::NATIVE;
+}
+
+CLGEMMKernelType CLGEMMDefaultTypeValhall::g710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
+{
+ ARM_COMPUTE_UNUSED(m, n, k, b);
+
+ return is_rhs_constant ? CLGEMMKernelType::RESHAPED_ONLY_RHS : CLGEMMKernelType::NATIVE;
}
CLGEMMKernelType CLGEMMDefaultTypeValhall::default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
diff --git a/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h b/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h
index 0893f11132..e190295ee4 100644
--- a/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h
+++ b/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2022 Arm Limited.
+ * Copyright (c) 2020-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,6 +50,7 @@ private:
CLGEMMKernelType g77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
CLGEMMKernelType g78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
CLGEMMKernelType g78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+ CLGEMMKernelType g710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
CLGEMMKernelType g715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
CLGEMMKernelType g715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
};