aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/operators/ClMatMul.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2023-05-03 12:21:38 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2023-05-04 10:31:33 +0000
commit352c07ddd49842b5c3a8e5a2b5a90832bfb70091 (patch)
treeb9a7e15b16feecc6d9336f84804347392683999f /src/gpu/cl/operators/ClMatMul.cpp
parent57132943e0df00aa008b90614ea5a9fa8b2dc18a (diff)
downloadComputeLibrary-352c07ddd49842b5c3a8e5a2b5a90832bfb70091.tar.gz
Implement OpenCL MatMul heuristic for Arm® Mali™-G710
- Add heuristic for f32/f16 and int8 quantized data types - Include MatMul configuration selection in the CLMatMul operator Resolves COMPMID-5950, COMPMID-5957, COMPMID-5959, COMPMID-5925, COMPMID-5926, COMPMID-5927, COMPMID-5928 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Change-Id: Ic222148da0337b88d4d8c960e3b6ac31003d8bcb Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9564 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/operators/ClMatMul.cpp')
-rw-r--r--src/gpu/cl/operators/ClMatMul.cpp25
1 files changed, 13 insertions, 12 deletions
diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp
index 3ad6d914c7..15833216bb 100644
--- a/src/gpu/cl/operators/ClMatMul.cpp
+++ b/src/gpu/cl/operators/ClMatMul.cpp
@@ -26,6 +26,11 @@
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/common/utils/Log.h"
#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h"
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
+
+using namespace arm_compute::cl_matmul;
namespace arm_compute
{
@@ -41,9 +46,12 @@ ClMatMul::~ClMatMul()
}
Status ClMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info)
{
- MatMulKernelInfo kernel_info;
- kernel_info.adj_lhs = matmul_info.adj_lhs();
- kernel_info.adj_rhs = matmul_info.adj_rhs();
+ const GPUTarget gpu_target = CLScheduler::get().target();
+
+ std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
+
+ MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
+
return ClMatMulNativeKernel::validate(lhs, rhs, output, kernel_info);
}
void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *output, const MatMulInfo &matmul_info)
@@ -55,16 +63,9 @@ void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *l
ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, output, matmul_info));
const GPUTarget gpu_target = CLScheduler::get().target();
- // Placeholder: Getting the heuristics calculated values for M0, N0, K0, and whether to export RHS to texture pipe
+ std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
- // Filling the MatMul Kernel info
- MatMulKernelInfo kernel_info;
- kernel_info.adj_lhs = matmul_info.adj_lhs();
- kernel_info.adj_rhs = matmul_info.adj_rhs();
- kernel_info.m0 = 1; // to be properly calculated from heuristics
- kernel_info.n0 = 4; // to be properly calculated from heuristics
- kernel_info.k0 = 4; // to be properly calculated from heuristics
- kernel_info.export_rhs_to_cl_image = false; // to be properly determined from heuristics
+ MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
// Set the target for the kernels
_native_matmul_kernel->set_target(gpu_target);