aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/heuristics/matmul_native
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/heuristics/matmul_native')
-rw-r--r--src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp314
-rw-r--r--src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h62
-rw-r--r--src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.cpp113
-rw-r--r--src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h57
-rw-r--r--src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp134
-rw-r--r--src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h83
-rw-r--r--src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h62
-rw-r--r--src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h63
-rw-r--r--src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h117
-rw-r--r--src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h135
10 files changed, 1140 insertions, 0 deletions
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp
new file mode 100644
index 0000000000..3a02a60650
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/TensorInfo.h"
+
+#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h"
+
+#include <utility>
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+ClMatMulNativeDefaultConfigValhall::ClMatMulNativeDefaultConfigValhall(GPUTarget gpu) : IClMatMulNativeKernelConfig(gpu)
+{
+}
+
+MatMulKernelInfo
+ClMatMulNativeDefaultConfigValhall::configure(const ITensorInfo *lhs, const ITensorInfo *rhs, const MatMulInfo &info)
+{
+ using ConfigurationFunctionExecutorPtr = MatMulKernelInfo (ClMatMulNativeDefaultConfigValhall::*)(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info);
+
+ ClMatMulNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G710(
+ &ClMatMulNativeDefaultConfigValhall::configure_G710_f32,
+ &ClMatMulNativeDefaultConfigValhall::configure_G710_f16,
+ &ClMatMulNativeDefaultConfigValhall::configure_G710_u8);
+
+ ClMatMulNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G715(
+ &ClMatMulNativeDefaultConfigValhall::configure_G715_f32,
+ &ClMatMulNativeDefaultConfigValhall::configure_G715_f16,
+ &ClMatMulNativeDefaultConfigValhall::configure_G715_u8);
+
+ ConfigurationFunctionExecutorPtr func = nullptr;
+ switch (_target)
+ {
+ case GPUTarget::G715:
+ case GPUTarget::G615:
+ func = configs_G715.get_function(lhs->data_type());
+ break;
+ case GPUTarget::G710:
+ default:
+ func = configs_G710.get_function(lhs->data_type());
+ break;
+ }
+
+ const bool adj_lhs = info.adj_lhs();
+ const bool adj_rhs = info.adj_rhs();
+
+ TensorShape lhs_shape = lhs->tensor_shape();
+ TensorShape rhs_shape = rhs->tensor_shape();
+
+ const bool is_batched = lhs_shape.num_dimensions() > 2;
+
+ if (is_batched == true)
+ {
+ lhs_shape.collapse_from(2);
+ }
+
+ const unsigned int m = adj_lhs ? lhs_shape.x() : lhs_shape.y();
+ const unsigned int n = adj_rhs ? rhs_shape.y() : rhs_shape.x();
+ const unsigned int k = adj_lhs ? lhs_shape.y() : lhs_shape.x();
+ const unsigned int b = lhs_shape.z();
+
+ ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for matmul native");
+ return (this->*func)(m, n, k, b, rhs->lock_paddings(), info);
+}
+
+MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G715_f32(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
+{
+ ARM_COMPUTE_UNUSED(m, n, k, b, rhs_lock_padding);
+ return {info.adj_lhs(), info.adj_rhs(), /* m0 */ 1, /* n0 */ 4, /* k0 */ 1, /* export_to_cl_image */ false};
+}
+
+MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G715_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
+{
+ return configure_G715_f32(m, n, k, b, rhs_lock_padding, info);
+}
+
+MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G715_u8(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
+{
+ ARM_COMPUTE_UNUSED(m, n, k, b, rhs_lock_padding);
+ return {info.adj_lhs(), info.adj_rhs(), /* m0 */ 4, /* n0 */ 16, /* k0 */ 4, /* export_to_cl_image */ false};
+}
+
+MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_f32(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
+{
+ const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt = {
+ {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 2, 8, 4, 1},
+ {24, 464, 412, 24, 2, 8, 4, 1}, {112, 184, 144, 28, 4, 4, 16, 1}, {5776, 64, 32, 36, 2, 4, 16, 1},
+ {1568, 64, 40, 36, 2, 8, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_nt = {
+ {3136, 64, 64, 36, 4, 4, 8, 0}, {4096, 48, 32, 36, 4, 4, 8, 0}, {688, 92, 68, 32, 5, 4, 4, 0},
+ {24, 464, 412, 24, 6, 2, 8, 0}, {112, 184, 144, 28, 6, 4, 4, 0}, {5776, 64, 32, 36, 5, 4, 4, 0},
+ {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 4, 8, 0}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t = {
+ {3136, 64, 64, 36, 4, 4, 4, 1}, {4096, 48, 32, 36, 2, 2, 16, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
+ {24, 464, 412, 24, 6, 2, 8, 1}, {112, 184, 144, 28, 4, 2, 16, 1}, {5776, 64, 32, 36, 4, 4, 4, 1},
+ {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 4, 1}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_t = {
+ {3136, 64, 64, 36, 5, 4, 4, 0}, {4096, 48, 32, 36, 5, 4, 4, 0}, {688, 92, 68, 32, 5, 4, 4, 0},
+ {24, 464, 412, 24, 6, 2, 4, 0}, {112, 184, 144, 28, 5, 4, 4, 0}, {5776, 64, 32, 36, 5, 4, 4, 0},
+ {1568, 64, 40, 36, 5, 4, 4, 0}, {2920, 64, 64, 24, 6, 2, 4, 0}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt = {
+ {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 2, 8, 4, 1},
+ {24, 464, 412, 24, 2, 8, 4, 1}, {112, 184, 144, 28, 4, 4, 16, 1}, {5776, 64, 32, 36, 2, 8, 8, 1},
+ {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_nt = {
+ {3136, 64, 64, 36, 4, 4, 4, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
+ {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 4, 4, 4, 0}, {5776, 64, 32, 36, 4, 4, 8, 0},
+ {1568, 64, 40, 36, 4, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 4, 0}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_best_t_t = {
+ {3136, 64, 64, 36, 4, 4, 4, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
+ {24, 464, 412, 24, 2, 2, 16, 1}, {112, 184, 144, 28, 4, 4, 4, 1}, {5776, 64, 32, 36, 4, 4, 4, 1},
+ {1568, 64, 40, 36, 4, 4, 4, 1}, {2920, 64, 64, 24, 4, 4, 4, 1}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_t = {
+ {3136, 64, 64, 36, 4, 4, 4, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
+ {24, 464, 412, 24, 4, 2, 8, 0}, {112, 184, 144, 28, 4, 4, 4, 0}, {5776, 64, 32, 36, 4, 4, 4, 0},
+ {1568, 64, 40, 36, 4, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 4, 0}};
+
+ const bool adj_lhs = info.adj_lhs();
+ const bool adj_rhs = info.adj_rhs();
+
+ const MatMulNativeConfigsMatrix *configs_best_to_use = nullptr;
+ const MatMulNativeConfigsMatrix *configs_fallback_to_use = nullptr;
+
+ if ((adj_lhs == false) && (adj_rhs == false))
+ {
+ configs_best_to_use = &configs_mnkb_best_nt_nt;
+ configs_fallback_to_use = &configs_mnkb_fallback_nt_nt;
+ }
+ else if ((adj_lhs == false) && (adj_rhs == true))
+ {
+ configs_best_to_use = &configs_mnkb_best_nt_t;
+ configs_fallback_to_use = &configs_mnkb_fallback_nt_t;
+ }
+ else if ((adj_lhs == true) && (adj_rhs == false))
+ {
+ configs_best_to_use = &configs_mnkb_best_t_nt;
+ configs_fallback_to_use = &configs_mnkb_fallback_t_nt;
+ }
+ else
+ {
+ configs_best_to_use = &configs_mnkb_best_t_t;
+ configs_fallback_to_use = &configs_mnkb_fallback_t_t;
+ }
+
+ MatMulKernelInfo desc0 = find_info(*configs_best_to_use, adj_lhs, adj_rhs, m, n, k, b);
+ MatMulKernelInfo desc1 = find_info(*configs_fallback_to_use, adj_lhs, adj_rhs, m, n, k, b);
+
+ return select_info(desc0, desc1, m, n, k, b, DataType::F32, rhs_lock_padding);
+}
+
+MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
+{
+ const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt = {
+ {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 8, 1}, {688, 92, 68, 32, 4, 4, 16, 1},
+ {24, 464, 412, 24, 4, 4, 4, 1}, {112, 184, 144, 28, 4, 4, 16, 1}, {5776, 64, 32, 36, 4, 4, 8, 1},
+ {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_nt = {
+ {3136, 64, 64, 36, 6, 4, 8, 0}, {4096, 48, 32, 36, 6, 4, 8, 0}, {688, 92, 68, 32, 6, 4, 8, 0},
+ {24, 464, 412, 24, 4, 4, 8, 0}, {112, 184, 144, 28, 6, 4, 8, 0}, {5776, 64, 32, 36, 6, 4, 8, 0},
+ {1568, 64, 40, 36, 6, 4, 8, 0}, {2920, 64, 64, 24, 6, 4, 8, 0}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t = {
+ {3136, 64, 64, 36, 6, 4, 8, 1}, {4096, 48, 32, 36, 6, 4, 8, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
+ {24, 464, 412, 24, 6, 2, 4, 1}, {112, 184, 144, 28, 4, 2, 16, 1}, {5776, 64, 32, 36, 6, 4, 8, 1},
+ {1568, 64, 40, 36, 6, 4, 8, 1}, {2920, 64, 64, 24, 6, 4, 8, 1}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_t = {
+ {3136, 64, 64, 36, 6, 2, 16, 0}, {4096, 48, 32, 36, 5, 4, 8, 0}, {688, 92, 68, 32, 6, 2, 16, 0},
+ {24, 464, 412, 24, 6, 2, 16, 0}, {112, 184, 144, 28, 6, 2, 16, 0}, {5776, 64, 32, 36, 5, 4, 8, 0},
+ {1568, 64, 40, 36, 5, 4, 8, 0}, {2920, 64, 64, 24, 6, 2, 16, 0}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt = {
+ {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
+ {24, 464, 412, 24, 4, 4, 4, 1}, {112, 184, 144, 28, 4, 4, 4, 1}, {5776, 64, 32, 36, 4, 4, 4, 1},
+ {1568, 64, 40, 36, 4, 4, 4, 1}, {2920, 64, 64, 24, 4, 4, 4, 1}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_nt = {
+ {3136, 64, 64, 36, 4, 4, 4, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
+ {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 4, 4, 4, 0}, {5776, 64, 32, 36, 4, 4, 4, 0},
+ {1568, 64, 40, 36, 4, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 4, 0}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_best_t_t = {
+ {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 8, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
+ {24, 464, 412, 24, 4, 2, 8, 1}, {112, 184, 144, 28, 4, 2, 16, 1}, {5776, 64, 32, 36, 4, 4, 16, 1},
+ {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_t = {
+ {3136, 64, 64, 36, 4, 4, 8, 0}, {4096, 48, 32, 36, 4, 4, 8, 0}, {688, 92, 68, 32, 4, 4, 8, 0},
+ {24, 464, 412, 24, 4, 4, 8, 0}, {112, 184, 144, 28, 4, 4, 8, 0}, {5776, 64, 32, 36, 4, 4, 8, 0},
+ {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 4, 8, 0}};
+
+ const bool adj_lhs = info.adj_lhs();
+ const bool adj_rhs = info.adj_rhs();
+
+ const MatMulNativeConfigsMatrix *configs_best_to_use = nullptr;
+ const MatMulNativeConfigsMatrix *configs_fallback_to_use = nullptr;
+
+ if ((adj_lhs == false) && (adj_rhs == false))
+ {
+ configs_best_to_use = &configs_mnkb_best_nt_nt;
+ configs_fallback_to_use = &configs_mnkb_fallback_nt_nt;
+ }
+ else if ((adj_lhs == false) && (adj_rhs == true))
+ {
+ configs_best_to_use = &configs_mnkb_best_nt_t;
+ configs_fallback_to_use = &configs_mnkb_fallback_nt_t;
+ }
+ else if ((adj_lhs == true) && (adj_rhs == false))
+ {
+ configs_best_to_use = &configs_mnkb_best_t_nt;
+ configs_fallback_to_use = &configs_mnkb_fallback_t_nt;
+ }
+ else
+ {
+ configs_best_to_use = &configs_mnkb_best_t_t;
+ configs_fallback_to_use = &configs_mnkb_fallback_t_t;
+ }
+
+ MatMulKernelInfo desc0 = find_info(*configs_best_to_use, adj_lhs, adj_rhs, m, n, k, b);
+ MatMulKernelInfo desc1 = find_info(*configs_fallback_to_use, adj_lhs, adj_rhs, m, n, k, b);
+
+ return select_info(desc0, desc1, m, n, k, b, DataType::F16, rhs_lock_padding);
+}
+
+MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_u8(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
+{
+ ARM_COMPUTE_UNUSED(rhs_lock_padding);
+
+ const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt = {
+ {3136, 64, 64, 36, 6, 4, 4, 0}, {4096, 48, 32, 36, 6, 4, 4, 0}, {688, 92, 68, 32, 2, 8, 4, 0},
+ {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 6, 4, 4, 0}, {5776, 64, 32, 36, 6, 4, 4, 0},
+ {1568, 64, 40, 36, 6, 4, 4, 0}, {2920, 64, 64, 24, 5, 4, 4, 0}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t = {
+ {3136, 64, 64, 36, 4, 4, 16, 0}, {4096, 48, 32, 36, 4, 4, 16, 0}, {688, 92, 68, 32, 4, 4, 16, 0},
+ {24, 464, 412, 24, 6, 2, 16, 0}, {112, 184, 144, 28, 4, 4, 16, 0}, {5776, 64, 32, 36, 4, 4, 16, 0},
+ {1568, 64, 40, 36, 6, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 16, 0}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt = {
+ {3136, 64, 64, 36, 4, 4, 8, 0}, {4096, 48, 32, 36, 4, 4, 8, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
+ {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 4, 4, 8, 0}, {5776, 64, 32, 36, 4, 4, 8, 0},
+ {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 4, 8, 0}};
+
+ const MatMulNativeConfigsMatrix configs_mnkb_best_t_t = {
+ {3136, 64, 64, 36, 4, 2, 16, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 8, 0},
+ {24, 464, 412, 24, 4, 2, 16, 0}, {112, 184, 144, 28, 4, 2, 16, 0}, {5776, 64, 32, 36, 4, 4, 4, 0},
+ {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 2, 16, 0}};
+
+ const bool adj_lhs = info.adj_lhs();
+ const bool adj_rhs = info.adj_rhs();
+
+ if ((adj_lhs == false) && (adj_rhs == false))
+ {
+ return find_info(configs_mnkb_best_nt_nt, adj_lhs, adj_rhs, m, n, k, b);
+ }
+ else if ((adj_lhs == false) && (adj_rhs == true))
+ {
+ return find_info(configs_mnkb_best_nt_t, adj_lhs, adj_rhs, m, n, k, b);
+ }
+ else if ((adj_lhs == true) && (adj_rhs == false))
+ {
+ return find_info(configs_mnkb_best_t_nt, adj_lhs, adj_rhs, m, n, k, b);
+ }
+ else
+ {
+ return find_info(configs_mnkb_best_t_t, adj_lhs, adj_rhs, m, n, k, b);
+ }
+}
+} // namespace cl_matmul
+} // namespace arm_compute
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h
new file mode 100644
index 0000000000..5279871057
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTCONFIGVALHALL_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTCONFIGVALHALL_H
+
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+/** Valhall based OpenCL matmul configuration */
+class ClMatMulNativeDefaultConfigValhall final : public IClMatMulNativeKernelConfig
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ ClMatMulNativeDefaultConfigValhall(GPUTarget gpu);
+
+ // Inherited overridden method
+ MatMulKernelInfo configure(const ITensorInfo *lhs, const ITensorInfo *rhs, const MatMulInfo &info) override;
+
+private:
+ MatMulKernelInfo configure_G710_f32(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info);
+ MatMulKernelInfo configure_G710_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info);
+ MatMulKernelInfo configure_G710_u8(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info);
+ MatMulKernelInfo configure_G715_f32(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info);
+ MatMulKernelInfo configure_G715_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info);
+ MatMulKernelInfo configure_G715_u8(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info);
+};
+} // namespace cl_matmul
+} // namespace arm_compute
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTCONFIGVALHALL_H
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.cpp b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.cpp
new file mode 100644
index 0000000000..3878f698fd
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/TensorInfo.h"
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+ClMatMulNativeDefaultVariantValhall::ClMatMulNativeDefaultVariantValhall(GPUTarget gpu)
+ : IClMatMulNativeKernelVariant(gpu)
+{
+}
+
+MatMulKernelType ClMatMulNativeDefaultVariantValhall::select_kernel(const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const MatMulInfo &info,
+ const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_UNUSED(rhs);
+
+ using VariantFunctionExecutorPtr =
+ MatMulKernelType (ClMatMulNativeDefaultVariantValhall::*)(int k, bool act_enabled);
+
+ ClMatMulNativeVariantArray<VariantFunctionExecutorPtr> configs_G715(
+ &ClMatMulNativeDefaultVariantValhall::configure_G715_float,
+ &ClMatMulNativeDefaultVariantValhall::configure_G715_quantized);
+
+ ClMatMulNativeVariantArray<VariantFunctionExecutorPtr> configs_default(
+ &ClMatMulNativeDefaultVariantValhall::configure_default_float,
+ &ClMatMulNativeDefaultVariantValhall::configure_default_quantized);
+
+ VariantFunctionExecutorPtr func = nullptr;
+ switch (_target)
+ {
+ case GPUTarget::G715:
+ case GPUTarget::G615:
+ func = configs_G715.get_function(lhs->data_type());
+ break;
+ default:
+ func = configs_default.get_function(lhs->data_type());
+ break;
+ }
+
+ const int k = info.adj_lhs() ? lhs->tensor_shape().y() : lhs->tensor_shape().x();
+ const bool act_enabled = act_info.enabled();
+
+ ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for matmul native");
+ return (this->*func)(k, act_enabled);
+}
+
+MatMulKernelType ClMatMulNativeDefaultVariantValhall::configure_G715_float(int k, bool act_enabled)
+{
+ // MMUL kernel works only when K is a multiple of 4
+ if (!act_enabled && k % 4 == 0)
+ {
+ return MatMulKernelType::NATIVE_MMUL_FP;
+ }
+
+ return MatMulKernelType::NATIVE_FP;
+}
+
+MatMulKernelType ClMatMulNativeDefaultVariantValhall::configure_G715_quantized(int k, bool act_enabled)
+{
+ // MMUL kernel works only when K is a multiple of 16
+ if (!act_enabled && k % 16 == 0)
+ {
+ return MatMulKernelType::NATIVE_MMUL_QUANTIZED;
+ }
+
+ return MatMulKernelType::NATIVE_QUANTIZED;
+}
+
+MatMulKernelType ClMatMulNativeDefaultVariantValhall::configure_default_float(int k, bool act_enabled)
+{
+ ARM_COMPUTE_UNUSED(k, act_enabled);
+
+ return MatMulKernelType::NATIVE_FP;
+}
+
+MatMulKernelType ClMatMulNativeDefaultVariantValhall::configure_default_quantized(int k, bool act_enabled)
+{
+ ARM_COMPUTE_UNUSED(k, act_enabled);
+
+ return MatMulKernelType::NATIVE_QUANTIZED;
+}
+
+} // namespace cl_matmul
+} // namespace arm_compute
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h
new file mode 100644
index 0000000000..a202676e98
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTVARIANTVALHALL_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTVARIANTVALHALL_H
+
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h"
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+/** Valhall based OpenCL matmul configuration */
+class ClMatMulNativeDefaultVariantValhall final : public IClMatMulNativeKernelVariant
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ ClMatMulNativeDefaultVariantValhall(GPUTarget gpu);
+
+ // Inherited overridden method
+ MatMulKernelType select_kernel(const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const MatMulInfo &info,
+ const ActivationLayerInfo &act_info) override;
+
+private:
+ MatMulKernelType configure_G715_float(int k, bool act_enabled);
+ MatMulKernelType configure_G715_quantized(int k, bool act_enabled);
+ MatMulKernelType configure_default_float(int k, bool act_enabled);
+ MatMulKernelType configure_default_quantized(int k, bool act_enabled);
+};
+} // namespace cl_matmul
+} // namespace arm_compute
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTVARIANTVALHALL_H
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp
new file mode 100644
index 0000000000..89cad30214
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+
+#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
+
+#include <limits>
+#include <utility>
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+MatMulKernelInfo select_info(const MatMulKernelInfo &info0,
+ const MatMulKernelInfo &info1,
+ unsigned int m,
+ unsigned int n,
+ unsigned int k,
+ unsigned int b,
+ DataType data_type,
+ bool rhs_lock_padding)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(info1.export_rhs_to_cl_image == true,
+ "The fallback MatMul configuration cannot have export_to_cl_image = true");
+ ARM_COMPUTE_ERROR_ON_MSG(info0.adj_lhs != info1.adj_lhs,
+ "The MatMul configurations must have the same adj_lhs value");
+ ARM_COMPUTE_ERROR_ON_MSG(info0.adj_rhs != info1.adj_rhs,
+ "The MatMul configurations must have the same adj_rhs value");
+
+ const bool adj_lhs = info0.adj_lhs;
+ const bool adj_rhs = info0.adj_rhs;
+
+ TensorInfo lhs_info =
+ !adj_lhs ? TensorInfo(TensorShape(k, m, b), 1, data_type) : TensorInfo(TensorShape(m, k, b), 1, data_type);
+ TensorInfo rhs_info =
+ !adj_rhs ? TensorInfo(TensorShape(n, k, b), 1, data_type) : TensorInfo(TensorShape(k, n, b), 1, data_type);
+ TensorInfo dst_info;
+
+ if (rhs_lock_padding == false)
+ {
+ if (bool(opencl::kernels::ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &dst_info, info0)))
+ {
+ return info0;
+ }
+ else
+ {
+ return info1;
+ }
+ }
+ else
+ {
+ return info1;
+ }
+}
+
+MatMulKernelInfo find_info(const MatMulNativeConfigsMatrix &configs,
+ bool adj_lhs,
+ bool adj_rhs,
+ unsigned int m,
+ unsigned int n,
+ unsigned int k,
+ unsigned int b)
+{
+ size_t min_acc = std::numeric_limits<size_t>::max();
+ size_t min_idx = 0;
+
+ ARM_COMPUTE_ERROR_ON(configs.size() == 0);
+ const size_t num_rows = configs.size();
+ const size_t num_cols = configs[0].size();
+
+ ARM_COMPUTE_ERROR_ON_MSG(num_cols != 8U,
+ "The entry should have 8 integer values representing: M, N, K, B, M0, N0. K0, IMG_RHS");
+ ARM_COMPUTE_UNUSED(num_cols);
+
+ // Find nearest GeMM workload
+ // Note: the workload does not depend on the K dimension
+ for (size_t y = 0; y < num_rows; ++y)
+ {
+ size_t mc0 = static_cast<size_t>(configs[y][0]);
+ size_t nc0 = static_cast<size_t>(configs[y][1]);
+ size_t kc0 = static_cast<size_t>(configs[y][2]);
+ size_t bc0 = static_cast<size_t>(configs[y][3]);
+
+ size_t acc = 0;
+ acc += (m - mc0) * (m - mc0);
+ acc += (n - nc0) * (n - nc0);
+ acc += (k - kc0) * (k - kc0);
+ acc += (b - bc0) * (b - bc0);
+ acc = std::sqrt(acc);
+ if (acc < min_acc)
+ {
+ min_acc = acc;
+ min_idx = y;
+ }
+ }
+
+ // Get the configuration from the nearest GeMM shape
+ MatMulKernelInfo desc;
+ desc.adj_lhs = adj_lhs;
+ desc.adj_rhs = adj_rhs;
+ desc.m0 = configs[min_idx][4];
+ desc.n0 = configs[min_idx][5];
+ desc.k0 = configs[min_idx][6];
+ desc.export_rhs_to_cl_image = configs[min_idx][7];
+
+ return desc;
+}
+} // namespace cl_matmul
+} // namespace arm_compute
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h
new file mode 100644
index 0000000000..699f5fe8c1
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS_H
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+// Forward declaration
+struct MatMulKernelInfo;
+
+namespace cl_matmul
+{
+using MatMulNativeConfigsMatrix = std::vector<std::vector<int32_t>>;
+
+/** This function accepts two MatMulKernelInfo objects where only the first can be with cl_image2d support enabled.
+ * The aim of this function is to check whether the first MatMulKernelInfo object is valid. If not, the function will
+ * return the second MatMulKernelInfo object. Otherwise, the first one.
+ *
+ * @param[in] info0 MatMulKernelInfo with cl_image2d support
+ * @param[in] info1 MatMulKernelInfo to fall-back if cl_image2d cannot be used
+ * @param[in] m Number of rows (M) of the LHS matrix
+ * @param[in] n Number of columns (N) in the RHS matrix not reshaped
+ * @param[in] k Number of rows (K) in the RHS matrix not reshaped
+ * @param[in] b Batch size
+ * @param[in] data_type Data type
+ * @param[in] rhs_lock_padding Flag used to know whether the RHS paddings are locked
+ *
+ * @return @ref MatMulKernelInfo
+ */
+MatMulKernelInfo select_info(const MatMulKernelInfo &info0,
+ const MatMulKernelInfo &info1,
+ unsigned int m,
+ unsigned int n,
+ unsigned int k,
+ unsigned int b,
+ DataType data_type,
+ bool rhs_lock_padding);
+
+/** Find the preferred configurations for the MatMul Native kernel using the MatMulNativeConfigsMatrix provided by the user
+ *
+ * @param[in] configs List of best configurations for a limited number of MatMul shapes
+ * @param[in] adj_lhs Adjoint LHS flag value
+ * @param[in] adj_rhs Adjoint RHS flag value
+ * @param[in] m Number of rows (M) of the LHS matrix
+ * @param[in] n Number of columns (N) in the RHS matrix not reshaped
+ * @param[in] k Number of rows (K) in the RHS matrix not reshaped
+ * @param[in] b Batch size
+ *
+ * @return @ref MatMulKernelInfo
+ */
+MatMulKernelInfo find_info(const MatMulNativeConfigsMatrix &configs,
+ bool adj_lhs,
+ bool adj_rhs,
+ unsigned int m,
+ unsigned int n,
+ unsigned int k,
+ unsigned int b);
+} // namespace cl_matmul
+} // namespace arm_compute
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS_H
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h
new file mode 100644
index 0000000000..e7485bca81
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG_H
+
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+/** ClMatMul configuration factory class */
+class ClMatMulNativeKernelConfigurationFactory final
+{
+public:
+ /** Static method to call the ClMatMul configuration class accordingly with the GPU target
+ *
+ * @param[in] gpu GPU target
+ *
+ * @return IClMatMulNativeKernelConfig
+ */
+ static std::unique_ptr<IClMatMulNativeKernelConfig> create(GPUTarget gpu)
+ {
+ switch (get_arch_from_target(gpu))
+ {
+ case GPUTarget::MIDGARD:
+ case GPUTarget::BIFROST:
+ case GPUTarget::VALHALL:
+ case GPUTarget::FIFTHGEN:
+ return std::make_unique<ClMatMulNativeDefaultConfigValhall>(gpu);
+ default:
+ ARM_COMPUTE_ERROR("Not supported GPU target");
+ }
+ }
+};
+} // namespace cl_matmul
+} // namespace arm_compute
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG_H
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h
new file mode 100644
index 0000000000..c2895b8919
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELVARIANT_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELVARIANT_H
+
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h"
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+
+/** ClMatMul variant factory class */
+class ClMatMulNativeKernelVariantFactory final
+{
+public:
+ /** Static method to call the ClMatMul configuration class accordingly with the GPU target
+ *
+ * @param[in] gpu GPU target
+ *
+ * @return IClMatMulNativeKernelVariant
+ */
+ static std::unique_ptr<IClMatMulNativeKernelVariant> create(GPUTarget gpu)
+ {
+ switch (get_arch_from_target(gpu))
+ {
+ case GPUTarget::MIDGARD:
+ case GPUTarget::BIFROST:
+ case GPUTarget::VALHALL:
+ case GPUTarget::FIFTHGEN:
+ return std::make_unique<ClMatMulNativeDefaultVariantValhall>(gpu);
+ default:
+ ARM_COMPUTE_ERROR("Not supported GPU target");
+ }
+ }
+};
+} // namespace cl_matmul
+} // namespace arm_compute
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELVARIANT_H
diff --git a/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h
new file mode 100644
index 0000000000..00ba3641d5
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG_H
+
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/MatMulInfo.h"
+
+#include "src/core/common/Macros.h"
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+/** Basic container for the OpenCL MatMul Native configuration functions */
+template <class T>
+class ClMatMulNativeConfigArray
+{
+public:
+ /** Alias for F32 index */
+ static constexpr size_t DT_F32 = 0;
+ /** Alias for F16 index */
+ static constexpr size_t DT_F16 = 1;
+ /** Alias for Int8 index */
+ static constexpr size_t DT_INT8 = 2;
+
+ /** Constructor
+ *
+ * @param[in] func_f32 Function to call for matmul native F32
+ * @param[in] func_f16 Function to call for matmul native F16
+ * @param[in] func_int8 Function to call for matmul native Int8 (QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL)
+ *
+ */
+ ClMatMulNativeConfigArray(T func_f32, T func_f16, T func_int8) : _configs{func_f32, func_f16, func_int8}
+ {
+ }
+
+ /** Method to return the matmul native configuration function based on data type
+ *
+ * @param[in] data_type Input data type
+ *
+ * @return the valid function otherwise it returns nullptr if the data type is not valid
+ */
+ T get_function(DataType data_type)
+ {
+ switch (data_type)
+ {
+ case DataType::F32:
+ return _configs.at(DT_F32);
+ case DataType::F16:
+ return _configs.at(DT_F16);
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ return _configs.at(DT_INT8);
+ default:
+ return nullptr;
+ }
+ }
+
+private:
+ std::array<T, 3> _configs;
+};
+
+/** Basic interface for the matmul native kernel configuration
+ * This is the base class that chooses architecture specific kernel configurations.
+*/
+class IClMatMulNativeKernelConfig
+{
+public:
+ /** Constructor
+ *
+ * @param[in] arch GPU target
+ */
+ IClMatMulNativeKernelConfig(GPUTarget arch) : _target(arch)
+ {
+ }
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClMatMulNativeKernelConfig);
+ /** Virtual destructor */
+ virtual ~IClMatMulNativeKernelConfig() = default;
+ /** This method returns the @ref MatMulKernelInfo for the given inputs
+ *
+ * @param[in] lhs LHS tensor
+ * @param[in] rhs RHS tensor
+ * @param[in] info MatMul info
+ */
+ virtual MatMulKernelInfo configure(const ITensorInfo *lhs, const ITensorInfo *rhs, const MatMulInfo &info) = 0;
+
+protected:
+ GPUTarget _target;
+};
+} // namespace cl_matmul
+} // namespace arm_compute
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG_H
diff --git a/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h
new file mode 100644
index 0000000000..eac41dd6a3
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELVARIANT_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELVARIANT_H
+
+#include "arm_compute/core/CoreTypes.h" // DataType
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/function_info/MatMulInfo.h"
+
+#include "src/core/common/Macros.h"
+
+#include <array>
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+enum class MatMulKernelType
+{
+ /** Native matrix multiplication for FP types */
+ NATIVE_FP,
+
+ /** Native matrix multiplication for quantized types */
+ NATIVE_QUANTIZED,
+
+ /** Native matrix multiplication using MMUL extension for FP types */
+ NATIVE_MMUL_FP,
+
+ /** Native matrix multiplication using MMUL extension for Quantized types */
+ NATIVE_MMUL_QUANTIZED
+};
+
+/** Basic container for the OpenCL MatMul Native variant functions */
+template <class T>
+class ClMatMulNativeVariantArray
+{
+public:
+ /** Alias for Float index */
+ static constexpr size_t DT_FLOAT = 0;
+ /** Alias for Quantized type index */
+ static constexpr size_t DT_QUANTIZED = 1;
+
+ /** Constructor
+ *
+ * @param[in] func_float Function to call for matmul native float (F32, F16)
+ * @param[in] func_quantized Function to call for matmul native quantized (QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL)
+ *
+ */
+ ClMatMulNativeVariantArray(T func_float, T func_quantized) : _configs{func_float, func_quantized}
+ {
+ }
+
+ /** Method to return the matmul native variant function based on data type
+ *
+ * @param[in] data_type Input data type
+ *
+ * @return the valid function otherwise it returns nullptr if the data type is not valid
+ */
+ T get_function(DataType data_type)
+ {
+ switch (data_type)
+ {
+ case DataType::F32:
+ case DataType::F16:
+ return _configs.at(DT_FLOAT);
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ return _configs.at(DT_QUANTIZED);
+ default:
+ return nullptr;
+ }
+ }
+
+private:
+ std::array<T, 2> _configs;
+};
+
+/** Basic interface for the matmul native kernel variant
+ * This is the base class that chooses architecture specific kernel variants.
+*/
+class IClMatMulNativeKernelVariant
+{
+public:
+ /** Constructor
+ *
+ * @param[in] arch GPU target
+ */
+ IClMatMulNativeKernelVariant(GPUTarget arch) : _target(arch)
+ {
+ }
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClMatMulNativeKernelVariant);
+ /** Virtual destructor */
+ virtual ~IClMatMulNativeKernelVariant() = default;
+ /** This method returns the @ref MatMulKernelType for the given inputs
+ *
+ * @param[in] lhs LHS tensor
+ * @param[in] rhs RHS tensor
+ * @param[in] info MatMul info
+ * @param[in] act_info Activation layer info
+ */
+ virtual MatMulKernelType select_kernel(const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const MatMulInfo &info,
+ const ActivationLayerInfo &act_info) = 0;
+
+protected:
+ GPUTarget _target;
+};
+} // namespace cl_matmul
+} // namespace arm_compute
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELVARIANT_H