From 85cafff0dd99b6f94a77a7d7933682fa7c6a4a70 Mon Sep 17 00:00:00 2001
From: Gunes Bayir <gunes.bayir@arm.com>
Date: Mon, 18 Dec 2023 13:29:31 +0000
Subject: =?UTF-8?q?Add=20Mali=E2=84=A2-G720=20and=20Mali=E2=84=A2-G620=20a?=
 =?UTF-8?q?s=20GpuTargets?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds adds the latest Gpus as Gpu Target and sets up kernel selection heuristics for MatMul to address some nightly issues.

Resolves: COMPMID-6766

Change-Id: I29dbb08c5ecfb3fcd63230b0b1675ab557074aca
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10902
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 Android.bp                                         |   1 +
 arm_compute/core/GPUTarget.h                       |  11 +-
 docs/user_guide/release_version_and_change_log.dox |   1 +
 filelist.json                                      |   3 +-
 src/core/GPUTarget.cpp                             |  50 ++++++--
 .../kernels/gemm/native/ClGemmNativeKernelConfig.h |   9 +-
 .../gemm/reshaped/ClGemmReshapedKernelConfig.h     |   9 +-
 .../ClGemmReshapedOnlyRhsKernelConfig.h            |   9 +-
 src/gpu/cl/operators/ClMatMul.cpp                  |  62 ++--------
 src/runtime/CL/gemm/CLGEMMKernelSelection.h        |   9 +-
 .../direct_conv/ClDirectConvKernelConfig.h         |   9 +-
 .../dwc_native/ClDWCNativeKernelConfig.h           |   9 +-
 .../indirect_conv/ClIndirectConvKernelConfig.h     |   9 +-
 .../ClMatMulNativeDefaultConfigValhall.cpp         |   1 +
 .../ClMatMulNativeDefaultVariantValhall.cpp        | 113 +++++++++++++++++
 .../ClMatMulNativeDefaultVariantValhall.h          |  57 +++++++++
 .../matmul_native/ClMatMulNativeHelpers.h          |   6 +-
 .../matmul_native/ClMatMulNativeKernelConfig.h     |   7 +-
 .../matmul_native/ClMatMulNativeKernelVariant.h    |  63 ++++++++++
 .../matmul_native/IClMatMulNativeKernelConfig.h    |  10 +-
 .../matmul_native/IClMatMulNativeKernelVariant.h   | 135 +++++++++++++++++++++
 tests/validation/UNIT/GPUTarget.cpp                |   4 +-
 utils/TypePrinter.h                                |   9 ++
 23 files changed, 486 insertions(+), 110 deletions(-)
 create mode 100644 src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.cpp
 create mode 100644 src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h
 create mode 100644 src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h
 create mode 100644 src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h

diff --git a/Android.bp b/Android.bp
index 0502e2c954..e2f86a4f46 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1041,6 +1041,7 @@ cc_library_static {
         "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp",
         "src/runtime/heuristics/indirect_conv/ClIndirectConvDefaultConfigValhall.cpp",
         "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp",
+        "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.cpp",
         "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp",
         "utils/CommonGraphOptions.cpp",
         "utils/GraphUtils.cpp",
diff --git a/arm_compute/core/GPUTarget.h b/arm_compute/core/GPUTarget.h
index affa79a89e..b107a52d9f 100644
--- a/arm_compute/core/GPUTarget.h
+++ b/arm_compute/core/GPUTarget.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022 Arm Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_GPUTARGET_H
-#define ARM_COMPUTE_GPUTARGET_H
+#ifndef ACL_ARM_COMPUTE_CORE_GPUTARGET_H
+#define ACL_ARM_COMPUTE_CORE_GPUTARGET_H
 
 #include "support/Traits.h"
 
@@ -39,6 +39,7 @@ enum class GPUTarget
     MIDGARD             = 0x100,
     BIFROST             = 0x200,
     VALHALL             = 0x300,
+    FIFTHGEN            = 0X400,
     T600                = 0x110,
     T700                = 0x120,
     T800                = 0x130,
@@ -62,6 +63,8 @@ enum class GPUTarget
     G310                = 0x343,
     G715                = 0x350,
     G615                = 0x351,
+    G720                = 0x410,
+    G620                = 0X411
 };
 
 /** Enable bitwise operations on GPUTarget enumerations */
@@ -114,4 +117,4 @@ inline bool gpu_target_is_in(GPUTarget target_to_check, GPUTarget target)
     return target_to_check == target;
 }
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_GPUTARGET_H */
+#endif // ACL_ARM_COMPUTE_CORE_GPUTARGET_H
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index f1d3b26c0c..894a6078ba 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -44,6 +44,7 @@ If there is more than one release in a month then an extra sequential number is
 v24.01 Public major release
  - Remove the legacy 'libarm_compute_core' library. This library is an artifact of Compute Library's legacy library architecture and no longer serves any purpose.
   You should link only to the main `libarm_compute` library for core functionality.
+ - Expand GPUTarget list with Mali™ G720 and G620.
  - New features
    - Add support for FP16 in all multi_isa builds.
  - Performance optimizations:
diff --git a/filelist.json b/filelist.json
index 617b01648f..0c9550905e 100644
--- a/filelist.json
+++ b/filelist.json
@@ -532,7 +532,8 @@
           "src/gpu/cl/operators/ClMatMul.cpp",
           "src/runtime/CL/functions/CLMatMul.cpp",
           "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp",
-          "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp"
+          "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp",
+          "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.cpp"
         ]
       }
     },
diff --git a/src/core/GPUTarget.cpp b/src/core/GPUTarget.cpp
index 2d1a13cb33..5904e1a06f 100644
--- a/src/core/GPUTarget.cpp
+++ b/src/core/GPUTarget.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022 Arm Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,23 @@
 
 namespace
 {
+
+arm_compute::GPUTarget get_fifth_gen_target(const std::string &version)
+{
+    if (version.find("G720") != std::string::npos)
+    {
+        return arm_compute::GPUTarget::G720;
+    }
+    else if (version.find("G620") != std::string::npos)
+    {
+        return arm_compute::GPUTarget::G620;
+    }
+    else
+    {
+        return arm_compute::GPUTarget::UNKNOWN;
+    }
+}
+
 arm_compute::GPUTarget get_valhall_target(const std::string &version)
 {
     if (version.find("G77") != std::string::npos)
@@ -152,16 +169,18 @@ namespace arm_compute
 const std::string &string_from_target(GPUTarget target)
 {
     static std::map<GPUTarget, const std::string> gpu_target_map = {
-        {GPUTarget::MIDGARD, "midgard"}, {GPUTarget::BIFROST, "bifrost"}, {GPUTarget::VALHALL, "valhall"},
-        {GPUTarget::T600, "t600"},       {GPUTarget::T700, "t700"},       {GPUTarget::T800, "t800"},
-        {GPUTarget::G71, "g71"},         {GPUTarget::G72, "g72"},         {GPUTarget::G51, "g51"},
-        {GPUTarget::G51BIG, "g51big"},   {GPUTarget::G51LIT, "g51lit"},   {GPUTarget::G31, "g31"},
-        {GPUTarget::G76, "g76"},         {GPUTarget::G52, "g52"},         {GPUTarget::G52LIT, "g52lit"},
-        {GPUTarget::G77, "g77"},         {GPUTarget::G57, "g57"},         {GPUTarget::G78, "g78"},
-        {GPUTarget::G68, "g68"},         {GPUTarget::G78AE, "g78ae"},     {GPUTarget::G710, "g710"},
-        {GPUTarget::G610, "g610"},       {GPUTarget::G510, "g510"},       {GPUTarget::G310, "g310"},
-        {GPUTarget::G715, "g715"},       {GPUTarget::G615, "g615"},
-    };
+        {GPUTarget::MIDGARD, "midgard"},   {GPUTarget::BIFROST, "bifrost"}, {GPUTarget::VALHALL, "valhall"},
+        {GPUTarget::FIFTHGEN, "fifthgen"},
+
+        {GPUTarget::T600, "t600"},         {GPUTarget::T700, "t700"},       {GPUTarget::T800, "t800"},
+        {GPUTarget::G71, "g71"},           {GPUTarget::G72, "g72"},         {GPUTarget::G51, "g51"},
+        {GPUTarget::G51BIG, "g51big"},     {GPUTarget::G51LIT, "g51lit"},   {GPUTarget::G31, "g31"},
+        {GPUTarget::G76, "g76"},           {GPUTarget::G52, "g52"},         {GPUTarget::G52LIT, "g52lit"},
+        {GPUTarget::G77, "g77"},           {GPUTarget::G57, "g57"},         {GPUTarget::G78, "g78"},
+        {GPUTarget::G68, "g68"},           {GPUTarget::G78AE, "g78ae"},     {GPUTarget::G710, "g710"},
+        {GPUTarget::G610, "g610"},         {GPUTarget::G510, "g510"},       {GPUTarget::G310, "g310"},
+        {GPUTarget::G715, "g715"},         {GPUTarget::G615, "g615"},       {GPUTarget::G720, "g720"},
+        {GPUTarget::G620, "g620"}};
 
     return gpu_target_map[target];
 }
@@ -188,8 +207,13 @@ GPUTarget get_target_from_name(const std::string &device_name)
     GPUTarget gpu_target;
     if (target == 'G' || is_future_gpu)
     {
-        // Check for Valhall or Bifrost
-        gpu_target = get_valhall_target(version);
+        // Check for Valhall, Bifrost or 5-th Gen
+        gpu_target = get_fifth_gen_target(version);
+        if (gpu_target == GPUTarget::UNKNOWN)
+        {
+            gpu_target = get_valhall_target(version);
+        }
+
         if (gpu_target == GPUTarget::UNKNOWN)
         {
             gpu_target = get_bifrost_target(version);
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h b/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h
index 955bb3c01a..22aa1e2034 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CL_GEMM_NATIVE_KERNEL_CONFIGURATION_H
-#define ARM_COMPUTE_CL_GEMM_NATIVE_KERNEL_CONFIGURATION_H
+#ifndef ACL_SRC_GPU_CL_KERNELS_GEMM_NATIVE_CLGEMMNATIVEKERNELCONFIG_H
+#define ACL_SRC_GPU_CL_KERNELS_GEMM_NATIVE_CLGEMMNATIVEKERNELCONFIG_H
 
 #include "src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h"
 #include "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.h"
@@ -58,6 +58,7 @@ public:
             case GPUTarget::BIFROST:
                 return std::make_unique<ClGemmDefaultConfigNativeBifrost>(gpu);
             case GPUTarget::VALHALL:
+            case GPUTarget::FIFTHGEN:
                 return std::make_unique<ClGemmDefaultConfigNativeValhall>(gpu);
             default:
                 ARM_COMPUTE_ERROR("Not supported GPU target");
@@ -68,4 +69,4 @@ public:
 } // namespace kernels
 } // namespace opencl
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_CL_GEMM_NATIVE_KERNEL_CONFIGURATION_H */
+#endif // ACL_SRC_GPU_CL_KERNELS_GEMM_NATIVE_CLGEMMNATIVEKERNELCONFIG_H
diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h b/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h
index 83928b3f4f..6327ee3027 100644
--- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h
+++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CL_GEMM_RESHAPED_KERNEL_CONFIGURATION_H
-#define ARM_COMPUTE_CL_GEMM_RESHAPED_KERNEL_CONFIGURATION_H
+#ifndef ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_CLGEMMRESHAPEDKERNELCONFIG_H
+#define ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_CLGEMMRESHAPEDKERNELCONFIG_H
 
 #include "src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h"
 #include "src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.h"
@@ -56,6 +56,7 @@ public:
             case GPUTarget::BIFROST:
                 return std::make_unique<ClGemmDefaultConfigReshapedBifrost>(gpu);
             case GPUTarget::VALHALL:
+            case GPUTarget::FIFTHGEN:
                 return std::make_unique<ClGemmDefaultConfigReshapedValhall>(gpu);
             default:
                 ARM_COMPUTE_ERROR("Not supported GPU target");
@@ -66,4 +67,4 @@ public:
 } // namespace kernels
 } // namespace opencl
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_GEMM_RESHAPED_KERNEL_CONFIGURATION_H */
+#endif // ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_CLGEMMRESHAPEDKERNELCONFIG_H
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h
index e07ad993ed..1f0c5c2d87 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CL_GEMM_RESHAPED_ONLY_RHS_KERNEL_CONFIGURATION_H
-#define ARM_COMPUTE_CL_GEMM_RESHAPED_ONLY_RHS_KERNEL_CONFIGURATION_H
+#ifndef ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_ONLY_RHS_CLGEMMRESHAPEDONLYRHSKERNELCONFIG_H
+#define ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_ONLY_RHS_CLGEMMRESHAPEDONLYRHSKERNELCONFIG_H
 
 #include "src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h"
 #include "src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h"
@@ -56,6 +56,7 @@ public:
             case GPUTarget::BIFROST:
                 return std::make_unique<ClGemmDefaultConfigReshapedRhsOnlyBifrost>(gpu);
             case GPUTarget::VALHALL:
+            case GPUTarget::FIFTHGEN:
                 return std::make_unique<ClGemmDefaultConfigReshapedRhsOnlyValhall>(gpu);
             default:
                 ARM_COMPUTE_ERROR("Not supported GPU target");
@@ -66,4 +67,4 @@ public:
 } // namespace kernels
 } // namespace opencl
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_GEMM_RESHAPED_ONLY_RHS_KERNEL_CONFIGURATION_H */
+#endif // ACL_SRC_GPU_CL_KERNELS_GEMM_RESHAPED_ONLY_RHS_CLGEMMRESHAPEDONLYRHSKERNELCONFIG_H
diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp
index 43303001d0..28a2aa2540 100644
--- a/src/gpu/cl/operators/ClMatMul.cpp
+++ b/src/gpu/cl/operators/ClMatMul.cpp
@@ -34,6 +34,7 @@
 #include "src/gpu/cl/kernels/ClMatMulNativeMMULKernel.h"
 #include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
 #include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h"
 #include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
 
 using namespace arm_compute::cl_matmul;
@@ -42,57 +43,6 @@ namespace arm_compute
 {
 namespace opencl
 {
-namespace
-{
-enum class MatMulKernelType
-{
-    /** Native matrix multiplication for FP types */
-    NATIVE_FP,
-
-    /** Native matrix multiplication for quantized types */
-    NATIVE_QUANTIZED,
-
-    /** Native matrix multiplication using MMUL extension for FP types */
-    NATIVE_MMUL_FP,
-
-    /** Native matrix multiplication using MMUL extension for Quantized types */
-    NATIVE_MMUL_QUANTIZED
-};
-
-MatMulKernelType get_matmul_kernel(const ITensorInfo         *lhs,
-                                   const ITensorInfo         *rhs,
-                                   const MatMulInfo          &matmul_info,
-                                   const ActivationLayerInfo &act_info)
-{
-    ARM_COMPUTE_UNUSED(lhs, rhs, matmul_info, act_info);
-
-    const bool is_quantized      = is_data_type_quantized_asymmetric(lhs->data_type());
-    const bool is_mmul_supported = arm_matrix_multiply_supported(CLKernelLibrary::get().get_device());
-
-    const int k = matmul_info.adj_lhs() ? lhs->tensor_shape().y() : lhs->tensor_shape().x();
-
-    if (is_quantized)
-    {
-        // MMUL kernel works only when K is a multiple of 16
-        if (is_mmul_supported && !act_info.enabled() && k % 16 == 0)
-        {
-            return MatMulKernelType::NATIVE_MMUL_QUANTIZED;
-        }
-
-        return MatMulKernelType::NATIVE_QUANTIZED;
-    }
-    else
-    {
-        // MMUL kernel works only when K is a multiple of 4
-        if (is_mmul_supported && !act_info.enabled() && k % 4 == 0)
-        {
-            return MatMulKernelType::NATIVE_MMUL_FP;
-        }
-
-        return MatMulKernelType::NATIVE_FP;
-    }
-}
-} // namespace
 using namespace arm_compute::opencl::kernels;
 
 ClMatMul::ClMatMul()
@@ -117,7 +67,10 @@ Status ClMatMul::validate(const ITensorInfo         *lhs,
 
     const MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
 
-    switch (get_matmul_kernel(lhs, rhs, matmul_info, act_info))
+    const auto             kernel_selector = ClMatMulNativeKernelVariantFactory::create(gpu_target);
+    const MatMulKernelType kernel_type     = kernel_selector->select_kernel(lhs, rhs, matmul_info, act_info);
+
+    switch (kernel_type)
     {
         case MatMulKernelType::NATIVE_FP:
             return ClMatMulNativeKernel::validate(lhs, rhs, nullptr /* bias */, dst, kernel_info, act_info);
@@ -149,7 +102,10 @@ void ClMatMul::configure(const CLCompileContext    &compile_context,
     const auto             kernel_config = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
     const MatMulKernelInfo kernel_info   = kernel_config->configure(lhs, rhs, matmul_info);
 
-    switch (get_matmul_kernel(lhs, rhs, matmul_info, act_info))
+    const auto             kernel_selector = ClMatMulNativeKernelVariantFactory::create(gpu_target);
+    const MatMulKernelType kernel_type     = kernel_selector->select_kernel(lhs, rhs, matmul_info, act_info);
+
+    switch (kernel_type)
     {
         case MatMulKernelType::NATIVE_FP:
         {
diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelection.h b/src/runtime/CL/gemm/CLGEMMKernelSelection.h
index c528dbcac4..98dd44b1bf 100644
--- a/src/runtime/CL/gemm/CLGEMMKernelSelection.h
+++ b/src/runtime/CL/gemm/CLGEMMKernelSelection.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_CLGEMMKERNELSELECTION_H
-#define SRC_CLGEMMKERNELSELECTION_H
+#ifndef ACL_SRC_RUNTIME_CL_GEMM_CLGEMMKERNELSELECTION_H
+#define ACL_SRC_RUNTIME_CL_GEMM_CLGEMMKERNELSELECTION_H
 
 #include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h"
 
@@ -53,6 +53,7 @@ public:
             case GPUTarget::BIFROST:
                 return std::make_unique<CLGEMMDefaultTypeBifrost>(gpu);
             case GPUTarget::VALHALL:
+            case GPUTarget::FIFTHGEN:
                 return std::make_unique<CLGEMMDefaultTypeValhall>(gpu);
             default:
                 ARM_COMPUTE_ERROR("Not supported GPU target");
@@ -61,4 +62,4 @@ public:
 };
 } // namespace cl_gemm
 } // namespace arm_compute
-#endif /* SRC_CLGEMMKERNELSELECTION_H */
+#endif // ACL_SRC_RUNTIME_CL_GEMM_CLGEMMKERNELSELECTION_H
diff --git a/src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h b/src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h
index 2c2509f70b..215b17ef79 100644
--- a/src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h
+++ b/src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG
-#define SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG_H
+#define ACL_SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG_H
 
 #include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h"
 #include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h"
@@ -53,6 +53,7 @@ public:
             case GPUTarget::BIFROST:
                 return std::make_unique<ClDirectConvDefaultConfigBifrost>(gpu);
             case GPUTarget::VALHALL:
+            case GPUTarget::FIFTHGEN:
                 return std::make_unique<ClDirectConvDefaultConfigValhall>(gpu);
             default:
                 ARM_COMPUTE_ERROR("Not supported GPU target");
@@ -61,4 +62,4 @@ public:
 };
 } // namespace cl_direct_conv
 } // namespace arm_compute
-#endif /* SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG */
+#endif // ACL_SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG_H
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h b/src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h
index 49ce6ff479..031cf1859a 100644
--- a/src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEKERNELCONFIG
-#define SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEKERNELCONFIG
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEKERNELCONFIG_H
+#define ACL_SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEKERNELCONFIG_H
 
 #include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h"
 #include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h"
@@ -54,6 +54,7 @@ public:
             case GPUTarget::BIFROST:
                 return std::make_unique<ClDWCNativeDefaultConfigBifrost>(gpu);
             case GPUTarget::VALHALL:
+            case GPUTarget::FIFTHGEN:
                 return std::make_unique<ClDWCNativeDefaultConfigValhall>(gpu);
             default:
                 ARM_COMPUTE_ERROR("Not supported GPU target");
@@ -62,4 +63,4 @@ public:
 };
 } // namespace cl_dwc
 } // namespace arm_compute
-#endif /* SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEKERNELCONFIG */
+#endif // ACL_SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEKERNELCONFIG_H
diff --git a/src/runtime/heuristics/indirect_conv/ClIndirectConvKernelConfig.h b/src/runtime/heuristics/indirect_conv/ClIndirectConvKernelConfig.h
index dd614e1f68..5e7ba6f8e9 100644
--- a/src/runtime/heuristics/indirect_conv/ClIndirectConvKernelConfig.h
+++ b/src/runtime/heuristics/indirect_conv/ClIndirectConvKernelConfig.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_RUNTIME_HEURISTICS_INDIRECT_CONV_CLINDIRECTCONVKERNELCONFIG
-#define SRC_RUNTIME_HEURISTICS_INDIRECT_CONV_CLINDIRECTCONVKERNELCONFIG
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_INDIRECT_CONV_CLINDIRECTCONVKERNELCONFIG_H
+#define ACL_SRC_RUNTIME_HEURISTICS_INDIRECT_CONV_CLINDIRECTCONVKERNELCONFIG_H
 
 #include "src/runtime/heuristics/indirect_conv/ClIndirectConvDefaultConfigValhall.h"
 #include "src/runtime/heuristics/indirect_conv/IClIndirectConvKernelConfig.h"
@@ -50,6 +50,7 @@ public:
             case GPUTarget::MIDGARD:
             case GPUTarget::BIFROST:
             case GPUTarget::VALHALL:
+            case GPUTarget::FIFTHGEN:
                 return std::make_unique<ClIndirectConvDefaultConfigValhall>(gpu);
             default:
                 ARM_COMPUTE_ERROR("Not supported GPU target");
@@ -58,4 +59,4 @@ public:
 };
 } // namespace cl_indirect_conv
 } // namespace arm_compute
-#endif /* SRC_RUNTIME_HEURISTICS_INDIRECT_CONV_CLINDIRECTCONVKERNELCONFIG */
+#endif // ACL_SRC_RUNTIME_HEURISTICS_INDIRECT_CONV_CLINDIRECTCONVKERNELCONFIG_H
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp
index 4b923547c4..3a02a60650 100644
--- a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp
@@ -62,6 +62,7 @@ ClMatMulNativeDefaultConfigValhall::configure(const ITensorInfo *lhs, const ITen
     switch (_target)
     {
         case GPUTarget::G715:
+        case GPUTarget::G615:
             func = configs_G715.get_function(lhs->data_type());
             break;
         case GPUTarget::G710:
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.cpp b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.cpp
new file mode 100644
index 0000000000..3878f698fd
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/TensorInfo.h"
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+ClMatMulNativeDefaultVariantValhall::ClMatMulNativeDefaultVariantValhall(GPUTarget gpu)
+    : IClMatMulNativeKernelVariant(gpu)
+{
+}
+
+MatMulKernelType ClMatMulNativeDefaultVariantValhall::select_kernel(const ITensorInfo         *lhs,
+                                                                    const ITensorInfo         *rhs,
+                                                                    const MatMulInfo          &info,
+                                                                    const ActivationLayerInfo &act_info)
+{
+    ARM_COMPUTE_UNUSED(rhs);
+
+    using VariantFunctionExecutorPtr =
+        MatMulKernelType (ClMatMulNativeDefaultVariantValhall::*)(int k, bool act_enabled);
+
+    ClMatMulNativeVariantArray<VariantFunctionExecutorPtr> configs_G715(
+        &ClMatMulNativeDefaultVariantValhall::configure_G715_float,
+        &ClMatMulNativeDefaultVariantValhall::configure_G715_quantized);
+
+    ClMatMulNativeVariantArray<VariantFunctionExecutorPtr> configs_default(
+        &ClMatMulNativeDefaultVariantValhall::configure_default_float,
+        &ClMatMulNativeDefaultVariantValhall::configure_default_quantized);
+
+    VariantFunctionExecutorPtr func = nullptr;
+    switch (_target)
+    {
+        case GPUTarget::G715:
+        case GPUTarget::G615:
+            func = configs_G715.get_function(lhs->data_type());
+            break;
+        default:
+            func = configs_default.get_function(lhs->data_type());
+            break;
+    }
+
+    const int  k           = info.adj_lhs() ? lhs->tensor_shape().y() : lhs->tensor_shape().x();
+    const bool act_enabled = act_info.enabled();
+
+    ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for matmul native");
+    return (this->*func)(k, act_enabled);
+}
+
+MatMulKernelType ClMatMulNativeDefaultVariantValhall::configure_G715_float(int k, bool act_enabled)
+{
+    // MMUL kernel works only when K is a multiple of 4
+    if (!act_enabled && k % 4 == 0)
+    {
+        return MatMulKernelType::NATIVE_MMUL_FP;
+    }
+
+    return MatMulKernelType::NATIVE_FP;
+}
+
+MatMulKernelType ClMatMulNativeDefaultVariantValhall::configure_G715_quantized(int k, bool act_enabled)
+{
+    // MMUL kernel works only when K is a multiple of 16
+    if (!act_enabled && k % 16 == 0)
+    {
+        return MatMulKernelType::NATIVE_MMUL_QUANTIZED;
+    }
+
+    return MatMulKernelType::NATIVE_QUANTIZED;
+}
+
+MatMulKernelType ClMatMulNativeDefaultVariantValhall::configure_default_float(int k, bool act_enabled)
+{
+    ARM_COMPUTE_UNUSED(k, act_enabled);
+
+    return MatMulKernelType::NATIVE_FP;
+}
+
+MatMulKernelType ClMatMulNativeDefaultVariantValhall::configure_default_quantized(int k, bool act_enabled)
+{
+    ARM_COMPUTE_UNUSED(k, act_enabled);
+
+    return MatMulKernelType::NATIVE_QUANTIZED;
+}
+
+} // namespace cl_matmul
+} // namespace arm_compute
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h
new file mode 100644
index 0000000000..a202676e98
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTVARIANTVALHALL_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTVARIANTVALHALL_H
+
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h"
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+/** Valhall based OpenCL matmul configuration */
+class ClMatMulNativeDefaultVariantValhall final : public IClMatMulNativeKernelVariant
+{
+public:
+    /** Constructor
+     *
+     * @param[in] gpu GPU target
+     */
+    ClMatMulNativeDefaultVariantValhall(GPUTarget gpu);
+
+    // Inherited overridden method
+    MatMulKernelType select_kernel(const ITensorInfo         *lhs,
+                                   const ITensorInfo         *rhs,
+                                   const MatMulInfo          &info,
+                                   const ActivationLayerInfo &act_info) override;
+
+private:
+    MatMulKernelType configure_G715_float(int k, bool act_enabled);
+    MatMulKernelType configure_G715_quantized(int k, bool act_enabled);
+    MatMulKernelType configure_default_float(int k, bool act_enabled);
+    MatMulKernelType configure_default_quantized(int k, bool act_enabled);
+};
+} // namespace cl_matmul
+} // namespace arm_compute
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEDEFAULTVARIANTVALHALL_H
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h
index a114fffa68..699f5fe8c1 100644
--- a/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS
-#define SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS_H
 
 #include "arm_compute/core/Types.h"
 
@@ -80,4 +80,4 @@ MatMulKernelInfo find_info(const MatMulNativeConfigsMatrix &configs,
                            unsigned int                     b);
 } // namespace cl_matmul
 } // namespace arm_compute
-#endif /* SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS */
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEHELPERS_H
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h
index b10018a6d2..e7485bca81 100644
--- a/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG
-#define SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG_H
 
 #include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
 #include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
@@ -50,6 +50,7 @@ public:
             case GPUTarget::MIDGARD:
             case GPUTarget::BIFROST:
             case GPUTarget::VALHALL:
+            case GPUTarget::FIFTHGEN:
                 return std::make_unique<ClMatMulNativeDefaultConfigValhall>(gpu);
             default:
                 ARM_COMPUTE_ERROR("Not supported GPU target");
@@ -58,4 +59,4 @@ public:
 };
 } // namespace cl_matmul
 } // namespace arm_compute
-#endif /* SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG */
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELCONFIG_H
diff --git a/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h
new file mode 100644
index 0000000000..c2895b8919
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELVARIANT_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELVARIANT_H
+
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.h"
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+
+/** ClMatMul variant factory class */
+class ClMatMulNativeKernelVariantFactory final
+{
+public:
+    /** Static method to call the ClMatMul configuration class accordingly with the GPU target
+     *
+     * @param[in] gpu GPU target
+     *
+     * @return IClMatMulNativeKernelVariant
+     */
+    static std::unique_ptr<IClMatMulNativeKernelVariant> create(GPUTarget gpu)
+    {
+        switch (get_arch_from_target(gpu))
+        {
+            case GPUTarget::MIDGARD:
+            case GPUTarget::BIFROST:
+            case GPUTarget::VALHALL:
+            case GPUTarget::FIFTHGEN:
+                return std::make_unique<ClMatMulNativeDefaultVariantValhall>(gpu);
+            default:
+                ARM_COMPUTE_ERROR("Not supported GPU target");
+        }
+    }
+};
+} // namespace cl_matmul
+} // namespace arm_compute
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_CLMATMULNATIVEKERNELVARIANT_H
diff --git a/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h
index b9b091100c..00ba3641d5 100644
--- a/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h
+++ b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG
-#define SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG_H
 
 #include "arm_compute/core/GPUTarget.h"
 #include "arm_compute/core/KernelDescriptors.h"
@@ -85,7 +85,9 @@ private:
     std::array<T, 3> _configs;
 };
 
-/** Basic interface for the matmul native kernel configuration */
+/** Basic interface for the matmul native kernel configuration
+ *  This is the base class that chooses architecture specific kernel configurations.
+*/
 class IClMatMulNativeKernelConfig
 {
 public:
@@ -112,4 +114,4 @@ protected:
 };
 } // namespace cl_matmul
 } // namespace arm_compute
-#endif /* SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG */
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELCONFIG_H
diff --git a/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h
new file mode 100644
index 0000000000..eac41dd6a3
--- /dev/null
+++ b/src/runtime/heuristics/matmul_native/IClMatMulNativeKernelVariant.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELVARIANT_H
+#define ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELVARIANT_H
+
+#include "arm_compute/core/CoreTypes.h" // DataType
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "arm_compute/function_info/MatMulInfo.h"
+
+#include "src/core/common/Macros.h"
+
+#include <array>
+
+namespace arm_compute
+{
+namespace cl_matmul
+{
+enum class MatMulKernelType
+{
+    /** Native matrix multiplication for FP types */
+    NATIVE_FP,
+
+    /** Native matrix multiplication for quantized types */
+    NATIVE_QUANTIZED,
+
+    /** Native matrix multiplication using MMUL extension for FP types */
+    NATIVE_MMUL_FP,
+
+    /** Native matrix multiplication using MMUL extension for Quantized types */
+    NATIVE_MMUL_QUANTIZED
+};
+
+/** Basic container for the OpenCL MatMul Native variant functions */
+template <class T>
+class ClMatMulNativeVariantArray
+{
+public:
+    /** Alias for Float index */
+    static constexpr size_t DT_FLOAT = 0;
+    /** Alias for Quantized type index */
+    static constexpr size_t DT_QUANTIZED = 1;
+
+    /** Constructor
+     *
+     * @param[in] func_float     Function to call for matmul native float (F32, F16)
+     * @param[in] func_quantized Function to call for matmul native quantized (QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL)
+     *
+     */
+    ClMatMulNativeVariantArray(T func_float, T func_quantized) : _configs{func_float, func_quantized}
+    {
+    }
+
+    /** Method to return the matmul native variant function based on data type
+     *
+     * @param[in] data_type Input data type
+     *
+     * @return the valid function otherwise it returns nullptr if the data type is not valid
+     */
+    T get_function(DataType data_type)
+    {
+        switch (data_type)
+        {
+            case DataType::F32:
+            case DataType::F16:
+                return _configs.at(DT_FLOAT);
+            case DataType::QASYMM8:
+            case DataType::QASYMM8_SIGNED:
+            case DataType::QSYMM8_PER_CHANNEL:
+                return _configs.at(DT_QUANTIZED);
+            default:
+                return nullptr;
+        }
+    }
+
+private:
+    std::array<T, 2> _configs;
+};
+
+/** Basic interface for the matmul native kernel variant
+ *  This is the base class that chooses architecture specific kernel variants.
+*/
+class IClMatMulNativeKernelVariant
+{
+public:
+    /** Constructor
+     *
+     * @param[in] arch GPU target
+     */
+    IClMatMulNativeKernelVariant(GPUTarget arch) : _target(arch)
+    {
+    }
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClMatMulNativeKernelVariant);
+    /** Virtual destructor */
+    virtual ~IClMatMulNativeKernelVariant() = default;
+    /** This method returns the @ref MatMulKernelType for the given inputs
+     *
+     * @param[in] lhs      LHS tensor
+     * @param[in] rhs      RHS tensor
+     * @param[in] info     MatMul info
+     * @param[in] act_info Activation layer info
+     */
+    virtual MatMulKernelType select_kernel(const ITensorInfo         *lhs,
+                                           const ITensorInfo         *rhs,
+                                           const MatMulInfo          &info,
+                                           const ActivationLayerInfo &act_info) = 0;
+
+protected:
+    GPUTarget _target;
+};
+} // namespace cl_matmul
+} // namespace arm_compute
+#endif // ACL_SRC_RUNTIME_HEURISTICS_MATMUL_NATIVE_ICLMATMULNATIVEKERNELVARIANT_H
diff --git a/tests/validation/UNIT/GPUTarget.cpp b/tests/validation/UNIT/GPUTarget.cpp
index 5ec2592f00..2e64635b7a 100644
--- a/tests/validation/UNIT/GPUTarget.cpp
+++ b/tests/validation/UNIT/GPUTarget.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022 Arm Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,6 +62,8 @@ TEST_CASE(GetGPUTargetFromName, framework::DatasetMode::ALL)
     ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G310") == GPUTarget::G310, framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G715") == GPUTarget::G715, framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G615") == GPUTarget::G615, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G720") == GPUTarget::G720, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G620") == GPUTarget::G620, framework::LogLevel::ERRORS);
 }
 
 TEST_CASE(GPUTargetIsIn, framework::DatasetMode::ALL)
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index 4f14d985af..e8831a354c 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -2230,6 +2230,9 @@ inline ::std::ostream &operator<<(::std::ostream &os, const GPUTarget &gpu_targe
         case GPUTarget::VALHALL:
             os << "VALHALL";
             break;
+        case GPUTarget::FIFTHGEN:
+            os << "FIFTHGEN";
+            break;
         case GPUTarget::T600:
             os << "T600";
             break;
@@ -2299,6 +2302,12 @@ inline ::std::ostream &operator<<(::std::ostream &os, const GPUTarget &gpu_targe
         case GPUTarget::G615:
             os << "G615";
             break;
+        case GPUTarget::G720:
+            os << "G720";
+            break;
+        case GPUTarget::G620:
+            os << "G620";
+            break;
         default:
             ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
     }
-- 
cgit v1.2.1