diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2023-05-03 12:21:38 +0100 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2023-05-04 10:31:33 +0000 |
commit | 352c07ddd49842b5c3a8e5a2b5a90832bfb70091 (patch) | |
tree | b9a7e15b16feecc6d9336f84804347392683999f /src/gpu | |
parent | 57132943e0df00aa008b90614ea5a9fa8b2dc18a (diff) | |
download | ComputeLibrary-352c07ddd49842b5c3a8e5a2b5a90832bfb70091.tar.gz |
Implement OpenCL MatMul heuristic for Arm® Mali™-G710
- Add heuristic for f32/f16 and int8 quantized data types
- Include MatMul configuration selection in the CLMatMul operator
Resolves COMPMID-5950, COMPMID-5957, COMPMID-5959, COMPMID-5925,
COMPMID-5926, COMPMID-5927, COMPMID-5928
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Change-Id: Ic222148da0337b88d4d8c960e3b6ac31003d8bcb
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9564
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu')
-rw-r--r-- | src/gpu/cl/operators/ClMatMul.cpp | 25 |
1 files changed, 13 insertions, 12 deletions
diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp index 3ad6d914c7..15833216bb 100644 --- a/src/gpu/cl/operators/ClMatMul.cpp +++ b/src/gpu/cl/operators/ClMatMul.cpp @@ -26,6 +26,11 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "src/common/utils/Log.h" #include "src/gpu/cl/kernels/ClMatMulNativeKernel.h" +#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h" +#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h" +#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h" + +using namespace arm_compute::cl_matmul; namespace arm_compute { @@ -41,9 +46,12 @@ ClMatMul::~ClMatMul() } Status ClMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info) { - MatMulKernelInfo kernel_info; - kernel_info.adj_lhs = matmul_info.adj_lhs(); - kernel_info.adj_rhs = matmul_info.adj_rhs(); + const GPUTarget gpu_target = CLScheduler::get().target(); + + std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target); + + MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info); + return ClMatMulNativeKernel::validate(lhs, rhs, output, kernel_info); } void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *output, const MatMulInfo &matmul_info) @@ -55,16 +63,9 @@ void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *l ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, output, matmul_info)); const GPUTarget gpu_target = CLScheduler::get().target(); - // Placeholder: Getting the heuristics calculated values for M0, N0, K0, and whether to export RHS to texture pipe + std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target); - // Filling the MatMul Kernel info - MatMulKernelInfo kernel_info; - kernel_info.adj_lhs = matmul_info.adj_lhs(); - kernel_info.adj_rhs = matmul_info.adj_rhs(); - kernel_info.m0 = 1; // to be properly calculated from heuristics - kernel_info.n0 = 4; // to be properly calculated from heuristics - kernel_info.k0 = 4; // to be properly calculated from heuristics - kernel_info.export_rhs_to_cl_image = false; // to be properly determined from heuristics + MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info); // Set the target for the kernels _native_matmul_kernel->set_target(gpu_target); |