Implement OpenCL MatMul heuristic for Arm® Mali™-G710

- Add heuristic for f32/f16 and int8 quantized data types - Include MatMul configuration selection in the CLMatMul operator Resolves COMPMID-5950, COMPMID-5957, COMPMID-5959, COMPMID-5925, COMPMID-5926, COMPMID-5927, COMPMID-5928 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Change-Id: Ic222148da0337b88d4d8c960e3b6ac31003d8bcb Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9564 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2023-05-03 12:21:38 +0100
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> 2023-05-04 10:31:33 +0000
commit: 352c07ddd49842b5c3a8e5a2b5a90832bfb70091 (patch)
tree: b9a7e15b16feecc6d9336f84804347392683999f /src/gpu/cl/operators/ClMatMul.cpp
parent: 57132943e0df00aa008b90614ea5a9fa8b2dc18a (diff)
download: ComputeLibrary-352c07ddd49842b5c3a8e5a2b5a90832bfb70091.tar.gz
1 files changed, 13 insertions, 12 deletions
diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp
index 3ad6d914c7..15833216bb 100644
--- a/src/gpu/cl/operators/ClMatMul.cpp
+++ b/src/gpu/cl/operators/ClMatMul.cpp
@@ -26,6 +26,11 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/common/utils/Log.h"
 #include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h"
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
+
+using namespace arm_compute::cl_matmul;
 
 namespace arm_compute
 {
@@ -41,9 +46,12 @@ ClMatMul::~ClMatMul()
 }
 Status ClMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info)
 {
-    MatMulKernelInfo kernel_info;
-    kernel_info.adj_lhs = matmul_info.adj_lhs();
-    kernel_info.adj_rhs = matmul_info.adj_rhs();
+    const GPUTarget gpu_target = CLScheduler::get().target();
+
+    std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
+
+    MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
+
     return ClMatMulNativeKernel::validate(lhs, rhs, output, kernel_info);
 }
 void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *output, const MatMulInfo &matmul_info)
@@ -55,16 +63,9 @@ void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *l
     ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, output, matmul_info));
     const GPUTarget gpu_target = CLScheduler::get().target();
 
-    // Placeholder: Getting the heuristics calculated values for M0, N0, K0, and whether to export RHS to texture pipe
+    std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
 
-    // Filling the MatMul Kernel info
-    MatMulKernelInfo kernel_info;
-    kernel_info.adj_lhs                = matmul_info.adj_lhs();
-    kernel_info.adj_rhs                = matmul_info.adj_rhs();
-    kernel_info.m0                     = 1;     // to be properly calculated from heuristics
-    kernel_info.n0                     = 4;     // to be properly calculated from heuristics
-    kernel_info.k0                     = 4;     // to be properly calculated from heuristics
-    kernel_info.export_rhs_to_cl_image = false; // to be properly determined from heuristics
+    MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
 
     // Set the target for the kernels
     _native_matmul_kernel->set_target(gpu_target);
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2023-05-03 12:21:38 +0100
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	2023-05-04 10:31:33 +0000
commit	352c07ddd49842b5c3a8e5a2b5a90832bfb70091 (patch)
tree	b9a7e15b16feecc6d9336f84804347392683999f /src/gpu/cl/operators/ClMatMul.cpp
parent	57132943e0df00aa008b90614ea5a9fa8b2dc18a (diff)
download	ComputeLibrary-352c07ddd49842b5c3a8e5a2b5a90832bfb70091.tar.gz