Implement OpenCL MatMul heuristic for Arm® Mali™-G710

- Add heuristic for f32/f16 and int8 quantized data types - Include MatMul configuration selection in the CLMatMul operator Resolves COMPMID-5950, COMPMID-5957, COMPMID-5959, COMPMID-5925, COMPMID-5926, COMPMID-5927, COMPMID-5928 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Change-Id: Ic222148da0337b88d4d8c960e3b6ac31003d8bcb Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9564 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2023-05-03 12:21:38 +0100
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> 2023-05-04 10:31:33 +0000
commit: 352c07ddd49842b5c3a8e5a2b5a90832bfb70091 (patch)
tree: b9a7e15b16feecc6d9336f84804347392683999f
parent: 57132943e0df00aa008b90614ea5a9fa8b2dc18a (diff)
download: ComputeLibrary-352c07ddd49842b5c3a8e5a2b5a90832bfb70091.tar.gz
10 files changed, 860 insertions, 14 deletions
diff --git a/Android.bp b/Android.bp
index 143e130f13..76a43caf97 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1000,6 +1000,8 @@ cc_library_static {
         "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp",
         "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp",
         "src/runtime/heuristics/indirect_conv/ClIndirectConvDefaultConfigValhall.cpp",
+        "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp",
+        "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp",
         "utils/CommonGraphOptions.cpp",
         "utils/GraphUtils.cpp",
         "utils/Utils.cpp",
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 016e03d88e..f637351e26 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -226,7 +226,8 @@ struct ScaleKernelInfo
 
 struct MatMulKernelInfo
 {
-    MatMulKernelInfo(bool adj_lhs = false, bool adj_rhs = false, int m0 = 1, int n0 = 1, int k0 = 1, bool export_rhs_to_cl_image = false)
+    MatMulKernelInfo() = default;
+    MatMulKernelInfo(bool adj_lhs, bool adj_rhs, int m0 = 1, int n0 = 1, int k0 = 1, bool export_rhs_to_cl_image = false)
         : adj_lhs{ adj_lhs }, adj_rhs{ adj_rhs }, m0{ m0 }, n0{ n0 }, k0{ k0 }, export_rhs_to_cl_image{ export_rhs_to_cl_image }
     {
     }
diff --git a/filelist.json b/filelist.json
index fc4aaeb217..4cf6230fef 100644
--- a/filelist.json
+++ b/filelist.json
@@ -505,7 +505,9 @@
           "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp",
           "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp",
           "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp",
-          "src/runtime/heuristics/indirect_conv/ClIndirectConvDefaultConfigValhall.cpp"
+          "src/runtime/heuristics/indirect_conv/ClIndirectConvDefaultConfigValhall.cpp",
+          "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp",
+          "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp"
         ]
       }
     },
diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp
index 3ad6d914c7..15833216bb 100644
--- a/src/gpu/cl/operators/ClMatMul.cpp
+++ b/src/gpu/cl/operators/ClMatMul.cpp
@@ -26,6 +26,11 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/common/utils/Log.h"
 #include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h"
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
+
+using namespace arm_compute::cl_matmul;
 
 namespace arm_compute
 {
@@ -41,9 +46,12 @@ ClMatMul::~ClMatMul()
 }
 Status ClMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info)
 {
-    MatMulKernelInfo kernel_info;
-    kernel_info.adj_lhs = matmul_info.adj_lhs();
-    kernel_info.adj_rhs = matmul_info.adj_rhs();
+    const GPUTarget gpu_target = CLScheduler::get().target();
+
+    std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
+
+    MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
+
     return ClMatMulNativeKernel::validate(lhs, rhs, output, kernel_info);
 }
 void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *output, const MatMulInfo &matmul_info)
@@ -55,16 +63,9 @@ void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *l
     ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, output, matmul_info));
     const GPUTarget gpu_target = CLScheduler::get().target();
 
-    // Placeholder: Getting the heuristics calculated values for M0, N0, K0, and whether to export RHS to texture pipe
+    std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
 
-    // Filling the MatMul Kernel info
-    MatMulKernelInfo kernel_info;
-    kernel_info.adj_lhs                = matmul_info.adj_lhs();
-    kernel_info.adj_rhs                = matmul_info.adj_rhs();
-    kernel_info.m0                     = 1;     // to be properly calculated from heuristics
-    kernel_info.n0                     = 4;     // to be properly calculated from heuristics
-    kernel_info.k0                     = 4;     // to be properly calculated from heuristics
-    kernel_info.export_rhs_to_cl_image = false; // to be properly determined from heuristics
+    MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
 
     // Set the target for the kernels
     _native_matmul_kernel->set_target(gpu_target);
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp
new file mode 100644
index 0000000000..01102b3d60
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp
@@ -0,0 +1,423 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
+#include <utility>
+
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h"
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+ClMatMulNativeDefaultConfigValhall::ClMatMulNativeDefaultConfigValhall(GPUTarget gpu)
+    : IClMatMulNativeKernelConfig(gpu)
+{
+}
+
+MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure(const ITensorInfo *lhs, const ITensorInfo *rhs, const MatMulInfo &info)
+{
+    using ConfigurationFunctionExecutorPtr = MatMulKernelInfo (ClMatMulNativeDefaultConfigValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo & info);
+
+    ClMatMulNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G710(&ClMatMulNativeDefaultConfigValhall::configure_G710_f32,
+                                                                             &ClMatMulNativeDefaultConfigValhall::configure_G710_f16,
+                                                                             &ClMatMulNativeDefaultConfigValhall::configure_G710_u8);
+
+    ConfigurationFunctionExecutorPtr func = nullptr;
+    switch(_target)
+    {
+        case GPUTarget::G710:
+        default:
+            func = configs_G710.get_function(lhs->data_type());
+            break;
+    }
+
+    const bool adj_lhs = info.adj_lhs();
+    const bool adj_rhs = info.adj_rhs();
+
+    TensorShape lhs_shape = lhs->tensor_shape();
+    TensorShape rhs_shape = rhs->tensor_shape();
+
+    const bool is_batched = lhs_shape.num_dimensions() > 2;
+
+    if(is_batched == true)
+    {
+        lhs_shape.collapse_from(2);
+    }
+
+    const unsigned int m = adj_lhs ? lhs_shape.x() : lhs_shape.y();
+    const unsigned int n = adj_rhs ? rhs_shape.y() : rhs_shape.x();
+    const unsigned int k = adj_lhs ? lhs_shape.y() : lhs_shape.x();
+    const unsigned int b = lhs_shape.z();
+
+    ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for matmul native");
+    return (this->*func)(m, n, k, b, rhs->lock_paddings(), info);
+}
+
+MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
+{
+    const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt =
+    {
+        { 3136, 64, 64, 36, 4, 4, 16, 1 },
+        { 4096, 48, 32, 36, 4, 4, 4, 1 },
+        { 688, 92, 68, 32, 2, 8, 4, 1 },
+        { 24, 464, 412, 24, 2, 8, 4, 1 },
+        { 112, 184, 144, 28, 4, 4, 16, 1 },
+        { 5776, 64, 32, 36, 2, 4, 16, 1 },
+        { 1568, 64, 40, 36, 2, 8, 8, 1 },
+        { 2920, 64, 64, 24, 4, 4, 16, 1 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_nt =
+    {
+        { 3136, 64, 64, 36, 4, 4, 8, 0 },
+        { 4096, 48, 32, 36, 4, 4, 8, 0 },
+        { 688, 92, 68, 32, 5, 4, 4, 0 },
+        { 24, 464, 412, 24, 6, 2, 8, 0 },
+        { 112, 184, 144, 28, 6, 4, 4, 0 },
+        { 5776, 64, 32, 36, 5, 4, 4, 0 },
+        { 1568, 64, 40, 36, 4, 4, 8, 0 },
+        { 2920, 64, 64, 24, 4, 4, 8, 0 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t =
+    {
+        { 3136, 64, 64, 36, 4, 4, 4, 1 },
+        { 4096, 48, 32, 36, 2, 2, 16, 1 },
+        { 688, 92, 68, 32, 4, 4, 4, 1 },
+        { 24, 464, 412, 24, 6, 2, 8, 1 },
+        { 112, 184, 144, 28, 4, 2, 16, 1 },
+        { 5776, 64, 32, 36, 4, 4, 4, 1 },
+        { 1568, 64, 40, 36, 4, 4, 8, 1 },
+        { 2920, 64, 64, 24, 4, 4, 4, 1 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_t =
+    {
+        { 3136, 64, 64, 36, 5, 4, 4, 0 },
+        { 4096, 48, 32, 36, 5, 4, 4, 0 },
+        { 688, 92, 68, 32, 5, 4, 4, 0 },
+        { 24, 464, 412, 24, 6, 2, 4, 0 },
+        { 112, 184, 144, 28, 5, 4, 4, 0 },
+        { 5776, 64, 32, 36, 5, 4, 4, 0 },
+        { 1568, 64, 40, 36, 5, 4, 4, 0 },
+        { 2920, 64, 64, 24, 6, 2, 4, 0 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt =
+    {
+        { 3136, 64, 64, 36, 4, 4, 16, 1 },
+        { 4096, 48, 32, 36, 4, 4, 4, 1 },
+        { 688, 92, 68, 32, 2, 8, 4, 1 },
+        { 24, 464, 412, 24, 2, 8, 4, 1 },
+        { 112, 184, 144, 28, 4, 4, 16, 1 },
+        { 5776, 64, 32, 36, 2, 8, 8, 1 },
+        { 1568, 64, 40, 36, 4, 4, 8, 1 },
+        { 2920, 64, 64, 24, 4, 4, 16, 1 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_nt =
+    {
+        { 3136, 64, 64, 36, 4, 4, 4, 0 },
+        { 4096, 48, 32, 36, 4, 4, 4, 0 },
+        { 688, 92, 68, 32, 4, 4, 4, 0 },
+        { 24, 464, 412, 24, 4, 4, 4, 0 },
+        { 112, 184, 144, 28, 4, 4, 4, 0 },
+        { 5776, 64, 32, 36, 4, 4, 8, 0 },
+        { 1568, 64, 40, 36, 4, 4, 4, 0 },
+        { 2920, 64, 64, 24, 4, 4, 4, 0 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_best_t_t =
+    {
+        { 3136, 64, 64, 36, 4, 4, 4, 1 },
+        { 4096, 48, 32, 36, 4, 4, 4, 1 },
+        { 688, 92, 68, 32, 4, 4, 4, 1 },
+        { 24, 464, 412, 24, 2, 2, 16, 1 },
+        { 112, 184, 144, 28, 4, 4, 4, 1 },
+        { 5776, 64, 32, 36, 4, 4, 4, 1 },
+        { 1568, 64, 40, 36, 4, 4, 4, 1 },
+        { 2920, 64, 64, 24, 4, 4, 4, 1 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_t =
+    {
+        { 3136, 64, 64, 36, 4, 4, 4, 0 },
+        { 4096, 48, 32, 36, 4, 4, 4, 0 },
+        { 688, 92, 68, 32, 4, 4, 4, 0 },
+        { 24, 464, 412, 24, 4, 2, 8, 0 },
+        { 112, 184, 144, 28, 4, 4, 4, 0 },
+        { 5776, 64, 32, 36, 4, 4, 4, 0 },
+        { 1568, 64, 40, 36, 4, 4, 4, 0 },
+        { 2920, 64, 64, 24, 4, 4, 4, 0 }
+    };
+
+    const bool adj_lhs = info.adj_lhs();
+    const bool adj_rhs = info.adj_rhs();
+
+    const MatMulNativeConfigsMatrix *configs_best_to_use     = nullptr;
+    const MatMulNativeConfigsMatrix *configs_fallback_to_use = nullptr;
+
+    if((adj_lhs == false) && (adj_rhs == false))
+    {
+        configs_best_to_use     = &configs_mnkb_best_nt_nt;
+        configs_fallback_to_use = &configs_mnkb_fallback_nt_nt;
+    }
+    else if((adj_lhs == false) && (adj_rhs == true))
+    {
+        configs_best_to_use     = &configs_mnkb_best_nt_t;
+        configs_fallback_to_use = &configs_mnkb_fallback_nt_t;
+    }
+    else if((adj_lhs == true) && (adj_rhs == false))
+    {
+        configs_best_to_use     = &configs_mnkb_best_t_nt;
+        configs_fallback_to_use = &configs_mnkb_fallback_t_nt;
+    }
+    else
+    {
+        configs_best_to_use     = &configs_mnkb_best_t_t;
+        configs_fallback_to_use = &configs_mnkb_fallback_t_t;
+    }
+
+    MatMulKernelInfo desc0 = find_info(*configs_best_to_use, adj_lhs, adj_rhs, m, n, k, b);
+    MatMulKernelInfo desc1 = find_info(*configs_fallback_to_use, adj_lhs, adj_rhs, m, n, k, b);
+
+    return select_info(desc0,
+                       desc1,
+                       m, n, k, b, DataType::F32, rhs_lock_padding);
+}
+
+MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
+{
+    const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt =
+    {
+        { 3136, 64, 64, 36, 4, 4, 16, 1 },
+        { 4096, 48, 32, 36, 4, 4, 8, 1 },
+        { 688, 92, 68, 32, 4, 4, 16, 1 },
+        { 24, 464, 412, 24, 4, 4, 4, 1 },
+        { 112, 184, 144, 28, 4, 4, 16, 1 },
+        { 5776, 64, 32, 36, 4, 4, 8, 1 },
+        { 1568, 64, 40, 36, 4, 4, 8, 1 },
+        { 2920, 64, 64, 24, 4, 4, 16, 1 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_nt =
+    {
+        { 3136, 64, 64, 36, 6, 4, 8, 0 },
+        { 4096, 48, 32, 36, 6, 4, 8, 0 },
+        { 688, 92, 68, 32, 6, 4, 8, 0 },
+        { 24, 464, 412, 24, 4, 4, 8, 0 },
+        { 112, 184, 144, 28, 6, 4, 8, 0 },
+        { 5776, 64, 32, 36, 6, 4, 8, 0 },
+        { 1568, 64, 40, 36, 6, 4, 8, 0 },
+        { 2920, 64, 64, 24, 6, 4, 8, 0 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t =
+    {
+        { 3136, 64, 64, 36, 6, 4, 8, 1 },
+        { 4096, 48, 32, 36, 6, 4, 8, 1 },
+        { 688, 92, 68, 32, 4, 4, 4, 1 },
+        { 24, 464, 412, 24, 6, 2, 4, 1 },
+        { 112, 184, 144, 28, 4, 2, 16, 1 },
+        { 5776, 64, 32, 36, 6, 4, 8, 1 },
+        { 1568, 64, 40, 36, 6, 4, 8, 1 },
+        { 2920, 64, 64, 24, 6, 4, 8, 1 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_t =
+    {
+        { 3136, 64, 64, 36, 6, 2, 16, 0 },
+        { 4096, 48, 32, 36, 5, 4, 8, 0 },
+        { 688, 92, 68, 32, 6, 2, 16, 0 },
+        { 24, 464, 412, 24, 6, 2, 16, 0 },
+        { 112, 184, 144, 28, 6, 2, 16, 0 },
+        { 5776, 64, 32, 36, 5, 4, 8, 0 },
+        { 1568, 64, 40, 36, 5, 4, 8, 0 },
+        { 2920, 64, 64, 24, 6, 2, 16, 0 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt =
+    {
+        { 3136, 64, 64, 36, 4, 4, 16, 1 },
+        { 4096, 48, 32, 36, 4, 4, 4, 1 },
+        { 688, 92, 68, 32, 4, 4, 4, 1 },
+        { 24, 464, 412, 24, 4, 4, 4, 1 },
+        { 112, 184, 144, 28, 4, 4, 4, 1 },
+        { 5776, 64, 32, 36, 4, 4, 4, 1 },
+        { 1568, 64, 40, 36, 4, 4, 4, 1 },
+        { 2920, 64, 64, 24, 4, 4, 4, 1 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_nt =
+    {
+        { 3136, 64, 64, 36, 4, 4, 4, 0 },
+        { 4096, 48, 32, 36, 4, 4, 4, 0 },
+        { 688, 92, 68, 32, 4, 4, 4, 0 },
+        { 24, 464, 412, 24, 4, 4, 4, 0 },
+        { 112, 184, 144, 28, 4, 4, 4, 0 },
+        { 5776, 64, 32, 36, 4, 4, 4, 0 },
+        { 1568, 64, 40, 36, 4, 4, 4, 0 },
+        { 2920, 64, 64, 24, 4, 4, 4, 0 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_best_t_t =
+    {
+        { 3136, 64, 64, 36, 4, 4, 16, 1 },
+        { 4096, 48, 32, 36, 4, 4, 8, 1 },
+        { 688, 92, 68, 32, 4, 4, 4, 1 },
+        { 24, 464, 412, 24, 4, 2, 8, 1 },
+        { 112, 184, 144, 28, 4, 2, 16, 1 },
+        { 5776, 64, 32, 36, 4, 4, 16, 1 },
+        { 1568, 64, 40, 36, 4, 4, 8, 1 },
+        { 2920, 64, 64, 24, 4, 4, 16, 1 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_t =
+    {
+        { 3136, 64, 64, 36, 4, 4, 8, 0 },
+        { 4096, 48, 32, 36, 4, 4, 8, 0 },
+        { 688, 92, 68, 32, 4, 4, 8, 0 },
+        { 24, 464, 412, 24, 4, 4, 8, 0 },
+        { 112, 184, 144, 28, 4, 4, 8, 0 },
+        { 5776, 64, 32, 36, 4, 4, 8, 0 },
+        { 1568, 64, 40, 36, 4, 4, 8, 0 },
+        { 2920, 64, 64, 24, 4, 4, 8, 0 }
+    };
+
+    const bool adj_lhs = info.adj_lhs();
+    const bool adj_rhs = info.adj_rhs();
+
+    const MatMulNativeConfigsMatrix *configs_best_to_use     = nullptr;
+    const MatMulNativeConfigsMatrix *configs_fallback_to_use = nullptr;
+
+    if((adj_lhs == false) && (adj_rhs == false))
+    {
+        configs_best_to_use     = &configs_mnkb_best_nt_nt;
+        configs_fallback_to_use = &configs_mnkb_fallback_nt_nt;
+    }
+    else if((adj_lhs == false) && (adj_rhs == true))
+    {
+        configs_best_to_use     = &configs_mnkb_best_nt_t;
+        configs_fallback_to_use = &configs_mnkb_fallback_nt_t;
+    }
+    else if((adj_lhs == true) && (adj_rhs == false))
+    {
+        configs_best_to_use     = &configs_mnkb_best_t_nt;
+        configs_fallback_to_use = &configs_mnkb_fallback_t_nt;
+    }
+    else
+    {
+        configs_best_to_use     = &configs_mnkb_best_t_t;
+        configs_fallback_to_use = &configs_mnkb_fallback_t_t;
+    }
+
+    MatMulKernelInfo desc0 = find_info(*configs_best_to_use, adj_lhs, adj_rhs, m, n, k, b);
+    MatMulKernelInfo desc1 = find_info(*configs_fallback_to_use, adj_lhs, adj_rhs, m, n, k, b);
+
+    return select_info(desc0,
+                       desc1,
+                       m, n, k, b, DataType::F16, rhs_lock_padding);
+}
+
+MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
+{
+    ARM_COMPUTE_UNUSED(rhs_lock_padding);
+
+    const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt =
+    {
+        { 3136, 64, 64, 36, 6, 4, 4, 0 },
+        { 4096, 48, 32, 36, 6, 4, 4, 0 },
+        { 688, 92, 68, 32, 2, 8, 4, 0 },
+        { 24, 464, 412, 24, 4, 4, 4, 0 },
+        { 112, 184, 144, 28, 6, 4, 4, 0 },
+        { 5776, 64, 32, 36, 6, 4, 4, 0 },
+        { 1568, 64, 40, 36, 6, 4, 4, 0 },
+        { 2920, 64, 64, 24, 5, 4, 4, 0 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t =
+    {
+        { 3136, 64, 64, 36, 4, 4, 16, 0 },
+        { 4096, 48, 32, 36, 4, 4, 16, 0 },
+        { 688, 92, 68, 32, 4, 4, 16, 0 },
+        { 24, 464, 412, 24, 6, 2, 16, 0 },
+        { 112, 184, 144, 28, 4, 4, 16, 0 },
+        { 5776, 64, 32, 36, 4, 4, 16, 0 },
+        { 1568, 64, 40, 36, 6, 4, 4, 0 },
+        { 2920, 64, 64, 24, 4, 4, 16, 0 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt =
+    {
+        { 3136, 64, 64, 36, 4, 4, 8, 0 },
+        { 4096, 48, 32, 36, 4, 4, 8, 0 },
+        { 688, 92, 68, 32, 4, 4, 4, 0 },
+        { 24, 464, 412, 24, 4, 4, 4, 0 },
+        { 112, 184, 144, 28, 4, 4, 8, 0 },
+        { 5776, 64, 32, 36, 4, 4, 8, 0 },
+        { 1568, 64, 40, 36, 4, 4, 8, 0 },
+        { 2920, 64, 64, 24, 4, 4, 8, 0 }
+    };
+
+    const MatMulNativeConfigsMatrix configs_mnkb_best_t_t =
+    {
+        { 3136, 64, 64, 36, 4, 2, 16, 0 },
+        { 4096, 48, 32, 36, 4, 4, 4, 0 },
+        { 688, 92, 68, 32, 4, 4, 8, 0 },
+        { 24, 464, 412, 24, 4, 2, 16, 0 },
+        { 112, 184, 144, 28, 4, 2, 16, 0 },
+        { 5776, 64, 32, 36, 4, 4, 4, 0 },
+        { 1568, 64, 40, 36, 4, 4, 8, 0 },
+        { 2920, 64, 64, 24, 4, 2, 16, 0 }
+    };
+
+    const bool adj_lhs = info.adj_lhs();
+    const bool adj_rhs = info.adj_rhs();
+
+    if((adj_lhs == false) && (adj_rhs == false))
+    {
+        return find_info(configs_mnkb_best_nt_nt, adj_lhs, adj_rhs, m, n, k, b);
+    }
+    else if((adj_lhs == false) && (adj_rhs == true))
+    {
+        return find_info(configs_mnkb_best_nt_t, adj_lhs, adj_rhs, m, n, k, b);
+    }
+    else if((adj_lhs == true) && (adj_rhs == false))
+    {
+        return find_info(configs_mnkb_best_t_nt, adj_lhs, adj_rhs, m, n, k, b);
+    }
+    else
+    {
+        return find_info(configs_mnkb_best_t_t, adj_lhs, adj_rhs, m, n, k, b);
+    }
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h
new file mode 100644
index 0000000000..fe167d18dd
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTCONFIGVALHALL
+#define SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTCONFIGVALHALL
+
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+/** Valhall based OpenCL matmul configuration */
+class ClMatMulNativeDefaultConfigValhall final : public IClMatMulNativeKernelConfig
+{
+public:
+    /** Constructor
+     *
+     * @param[in] gpu GPU target
+     */
+    ClMatMulNativeDefaultConfigValhall(GPUTarget gpu);
+
+    // Inherited overridden method
+    MatMulKernelInfo configure(const ITensorInfo *lhs, const ITensorInfo *rhs, const MatMulInfo &info) override;
+
+private:
+    MatMulKernelInfo configure_G710_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info);
+    MatMulKernelInfo configure_G710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info);
+    MatMulKernelInfo configure_G710_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info);
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTCONFIGVALHALL */
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp
new file mode 100644
index 0000000000..b9e0d5adf8
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
+
+#include <limits>
+#include <utility>
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+MatMulKernelInfo select_info(const MatMulKernelInfo &info0,
+                             const MatMulKernelInfo &info1,
+                             unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type, bool rhs_lock_padding)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(info1.export_rhs_to_cl_image == true, "The fallback MatMul configuration cannot have export_to_cl_image = true");
+    ARM_COMPUTE_ERROR_ON_MSG(info0.adj_lhs != info1.adj_lhs, "The MatMul configurations must have the same adj_lhs value");
+    ARM_COMPUTE_ERROR_ON_MSG(info0.adj_rhs != info1.adj_rhs, "The MatMul configurations must have the same adj_rhs value");
+
+    const bool adj_lhs = info0.adj_lhs;
+    const bool adj_rhs = info0.adj_rhs;
+
+    TensorInfo lhs_info = !adj_lhs ? TensorInfo(TensorShape(k, m, b), 1, data_type) : TensorInfo(TensorShape(m, k, b), 1, data_type);
+    TensorInfo rhs_info = !adj_rhs ? TensorInfo(TensorShape(n, k, b), 1, data_type) : TensorInfo(TensorShape(k, n, b), 1, data_type);
+    TensorInfo dst_info;
+
+    if(rhs_lock_padding == false)
+    {
+        if(bool(opencl::kernels::ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, &dst_info, info0)))
+        {
+            return info0;
+        }
+        else
+        {
+            return info1;
+        }
+    }
+    else
+    {
+        return info1;
+    }
+}
+
+MatMulKernelInfo find_info(const MatMulNativeConfigsMatrix &configs, bool adj_lhs, bool adj_rhs, unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+{
+    size_t min_acc = std::numeric_limits<size_t>::max();
+    size_t min_idx = 0;
+
+    ARM_COMPUTE_ERROR_ON(configs.size() == 0);
+    const size_t num_rows = configs.size();
+    const size_t num_cols = configs[0].size();
+
+    ARM_COMPUTE_ERROR_ON_MSG(num_cols != 8U, "The entry should have 8 integer values representing: M, N, K, B, M0, N0. K0, IMG_RHS");
+    ARM_COMPUTE_UNUSED(num_cols);
+
+    // Find nearest GeMM workload
+    // Note: the workload does not depend on the K dimension
+    for(size_t y = 0; y < num_rows; ++y)
+    {
+        size_t mc0 = static_cast<size_t>(configs[y][0]);
+        size_t nc0 = static_cast<size_t>(configs[y][1]);
+        size_t kc0 = static_cast<size_t>(configs[y][2]);
+        size_t bc0 = static_cast<size_t>(configs[y][3]);
+
+        size_t acc = 0;
+        acc += (m - mc0) * (m - mc0);
+        acc += (n - nc0) * (n - nc0);
+        acc += (k - kc0) * (k - kc0);
+        acc += (b - bc0) * (b - bc0);
+        acc = std::sqrt(acc);
+        if(acc < min_acc)
+        {
+            min_acc = acc;
+            min_idx = y;
+        }
+    }
+
+    // Get the configuration from the nearest GeMM shape
+    MatMulKernelInfo desc;
+    desc.adj_lhs                = adj_lhs;
+    desc.adj_rhs                = adj_rhs;
+    desc.m0                     = configs[min_idx][4];
+    desc.n0                     = configs[min_idx][5];
+    desc.k0                     = configs[min_idx][6];
+    desc.export_rhs_to_cl_image = configs[min_idx][7];
+
+    return desc;
+}
+} // namespace cl_matmul
+} // namespace arm_compute
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h
new file mode 100644
index 0000000000..3881617558
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS
+#define SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+// Forward declaration
+struct MatMulKernelInfo;
+
+namespace cl_matmul
+{
+using MatMulNativeConfigsMatrix = std::vector<std::vector<int32_t>>;
+
+/** This function accepts two MatMulKernelInfo objects where only the first can be with cl_image2d support enabled.
+ *  The aim of this function is to check whether the first MatMulKernelInfo object is valid. If not, the function will
+ *  return the second MatMulKernelInfo object. Otherwise, the first one.
+ *
+ * @param[in] info0            MatMulKernelInfo with cl_image2d support
+ * @param[in] info1            MatMulKernelInfo to fall-back if cl_image2d cannot be used
+ * @param[in] m                Number of rows (M) of the LHS matrix
+ * @param[in] n                Number of columns (N) in the RHS matrix not reshaped
+ * @param[in] k                Number of rows (K) in the RHS matrix not reshaped
+ * @param[in] b                Batch size
+ * @param[in] data_type        Data type
+ * @param[in] rhs_lock_padding Flag used to know whether the RHS paddings are locked
+ *
+ * @return @ref MatMulKernelInfo
+ */
+MatMulKernelInfo select_info(const MatMulKernelInfo &info0,
+                             const MatMulKernelInfo &info1,
+                             unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type, bool rhs_lock_padding);
+
+/** Find the preferred configurations for the MatMul Native kernel using the MatMulNativeConfigsMatrix provided by the user
+ *
+ * @param[in] configs List of best configurations for a limited number of MatMul shapes
+ * @param[in] adj_lhs Adjoint LHS flag value
+ * @param[in] adj_rhs Adjoint RHS flag value
+ * @param[in] m       Number of rows (M) of the LHS matrix
+ * @param[in] n       Number of columns (N) in the RHS matrix not reshaped
+ * @param[in] k       Number of rows (K) in the RHS matrix not reshaped
+ * @param[in] b       Batch size
+ *
+ * @return @ref MatMulKernelInfo
+ */
+MatMulKernelInfo find_info(const MatMulNativeConfigsMatrix &configs, bool adj_lhs, bool adj_rhs, unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+} // namespace cl_matmul
+} // namespace arm_compute
+#endif /* SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS */
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h
new file mode 100644
index 0000000000..a2dbfc7dd5
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG
+#define SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG
+
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+/** ClMatMul configuration factory class */
+class ClMatMulNativeKernelConfigurationFactory final
+{
+public:
+    /** Static method to call the ClMatMul configuration class accordingly with the GPU target
+     *
+     * @param[in] gpu GPU target
+     *
+     * @return IClMatMulNativeKernelConfig
+     */
+    static std::unique_ptr<IClMatMulNativeKernelConfig> create(GPUTarget gpu)
+    {
+        switch(get_arch_from_target(gpu))
+        {
+            case GPUTarget::MIDGARD:
+            case GPUTarget::BIFROST:
+            case GPUTarget::VALHALL:
+                return std::make_unique<ClMatMulNativeDefaultConfigValhall>(gpu);
+            default:
+                ARM_COMPUTE_ERROR("Not supported GPU target");
+        }
+    }
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG */
diff --git a/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h
new file mode 100644
index 0000000000..ee9b03e63b
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG
+#define SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG
+
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/common/Macros.h"
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+/** Basic container for the OpenCL MatMul Native configuration functions */
+template <class T>
+class ClMatMulNativeConfigArray
+{
+public:
+    /** Alias for F32 index */
+    static constexpr size_t DT_F32 = 0;
+    /** Alias for F16 index */
+    static constexpr size_t DT_F16 = 1;
+    /** Alias for Int8 index */
+    static constexpr size_t DT_INT8 = 2;
+
+    /** Constructor
+     *
+     * @param[in] func_f32  Function to call for matmul native F32
+     * @param[in] func_f16  Function to call for matmul native F16
+     * @param[in] func_int8 Function to call for matmul native Int8 (QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL)
+     *
+     */
+    ClMatMulNativeConfigArray(T func_f32, T func_f16, T func_int8)
+        : _configs{ func_f32, func_f16, func_int8 }
+    {
+    }
+
+    /** Method to return the matmul native configuration function based on data type
+     *
+     * @param[in] data_type Input data type
+     *
+     * @return the valid function otherwise it returns nullptr if the data type is not valid
+     */
+    T get_function(DataType data_type)
+    {
+        switch(data_type)
+        {
+            case DataType::F32:
+                return _configs.at(DT_F32);
+            case DataType::F16:
+                return _configs.at(DT_F16);
+            case DataType::QASYMM8:
+            case DataType::QASYMM8_SIGNED:
+            case DataType::QSYMM8_PER_CHANNEL:
+                return _configs.at(DT_INT8);
+            default:
+                return nullptr;
+        }
+    }
+
+private:
+    std::array<T, 3> _configs;
+};
+
+/** Basic interface for the matmul native kernel configuration */
+class IClMatMulNativeKernelConfig
+{
+public:
+    /** Constructor
+     *
+     * @param[in] arch GPU target
+     */
+    IClMatMulNativeKernelConfig(GPUTarget arch)
+        : _target(arch)
+    {
+    }
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClMatMulNativeKernelConfig);
+    /** Virtual destructor */
+    virtual ~IClMatMulNativeKernelConfig() = default;
+    /** This method returns the @ref MatMulKernelInfo for the given inputs
+     *
+     * @param[in] lhs  LHS tensor
+     * @param[in] rhs  RHS tensor
+     * @param[in] info MatMul info
+     */
+    virtual MatMulKernelInfo configure(const ITensorInfo *lhs, const ITensorInfo *rhs, const MatMulInfo &info) = 0;
+
+protected:
+    GPUTarget _target;
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG */
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2023-05-03 12:21:38 +0100
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	2023-05-04 10:31:33 +0000
commit	352c07ddd49842b5c3a8e5a2b5a90832bfb70091 (patch)
tree	b9a7e15b16feecc6d9336f84804347392683999f
parent	57132943e0df00aa008b90614ea5a9fa8b2dc18a (diff)
download	ComputeLibrary-352c07ddd49842b5c3a8e5a2b5a90832bfb70091.tar.gz