From d9c1d44185e4fea72ce4d37d76021237fe64eeaf Mon Sep 17 00:00:00 2001
From: Adnan AlSinan <adnan.alsinan@arm.com>
Date: Fri, 6 Oct 2023 19:07:48 +0100
Subject: Port MatMul to Dynamic Fusion + CKW boilerplate code

- Port Matmaul to to Dynamic Fusion.
- Prepare a CKW boilerplate code.
- Implement the following classes:
 - MatMulAttributes
 - GPUMatMulSettings
 - GpuMatMul
 - ClComponentMatMul
 - GpuCkwMatMul

Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com>
Change-Id: I5a7c183b293973e8a4233b554b2affe0bb28f44d
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10453
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 .../sketch/attributes/MatMulAttributes.cpp         |  52 +++++
 .../gpu/ckw_driver/components/GpuCkwMatMul.cpp     |  71 ++++++
 .../gpu/ckw_driver/components/GpuCkwMatMul.h       |  85 +++++++
 .../sketch/gpu/components/cl/ClComponentMatMul.cpp | 151 +++++++++++++
 .../sketch/gpu/components/cl/ClComponentMatMul.h   | 123 ++++++++++
 .../sketch/gpu/operators/GpuMatMul.cpp             | 249 +++++++++++++++++++++
 6 files changed, 731 insertions(+)
 create mode 100644 src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp
 create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp
 create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h
 create mode 100644 src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
 create mode 100644 src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h
 create mode 100644 src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp

(limited to 'src')

diff --git a/src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp b/src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp
new file mode 100644
index 0000000000..027b550377
--- /dev/null
+++ b/src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+MatMulAttributes MatMulAttributes::adj_lhs(bool adj_lhs)
+{
+    _adj_lhs = adj_lhs;
+    return *this;
+}
+MatMulAttributes MatMulAttributes::adj_rhs(bool adj_rhs)
+{
+    _adj_rhs = adj_rhs;
+    return *this;
+}
+bool MatMulAttributes::adj_lhs() const
+{
+    return _adj_lhs;
+}
+bool MatMulAttributes::adj_rhs() const
+{
+    return _adj_rhs;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp
new file mode 100644
index 0000000000..77e5f7af01
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h"
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+GpuCkwMatMul::GpuCkwMatMul(ComponentId                      id,
+                           const ArgumentPack<ITensorInfo> &tensors,
+                           const Attributes                &attributes,
+                           const Settings                  &settings)
+    : IGpuCkwComponentDriver{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}, _settings{settings}
+{
+    _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst);
+}
+
+void GpuCkwMatMul::write_component_code(const ComponentGroup    &comp_group,
+                                        GpuCkwVariableTable     &vtable,
+                                        GpuCkwScopedKernelWriter writer) const
+{
+    ARM_COMPUTE_UNUSED(comp_group, vtable, writer);
+}
+
+Window GpuCkwMatMul::get_window() const
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+    return Window();
+}
+
+std::string GpuCkwMatMul::get_name(const ComponentGroup &comp_group) const
+{
+    ARM_COMPUTE_UNUSED(comp_group);
+
+    return "MatMul";
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h
new file mode 100644
index 0000000000..ae2ea09f05
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
+
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwMatMul final : public IGpuCkwComponentDriver
+{
+public:
+    using Attributes = ClComponentMatMul::Attributes;
+    using Settings   = ClComponentMatMul::Settings;
+
+public:
+    /** Constructor
+     *
+     * For supported configurations please refer to @ref ClComponentMatMul::validate()
+     *
+     * @param[in] id         Component id
+     * @param[in] tensors    Tensor arguments to the component
+     * @param[in] attributes Component attributes. Attributes are a set of parameters that define what a component does
+     * @param[in] settings   Component settings. Settings are a set of parameters that influence the implementation of a component
+     */
+    GpuCkwMatMul(ComponentId                      id,
+                 const ArgumentPack<ITensorInfo> &tensors,
+                 const Attributes                &attributes,
+                 const Settings                  &settings);
+
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwMatMul);
+
+    /** Destructor */
+    ~GpuCkwMatMul() override = default;
+
+    // Inherited methods overriden
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+    std::string  get_name(const ComponentGroup &comp_group) const override;
+
+private:
+    const ITensorInfo *_lhs;
+    const ITensorInfo *_rhs;
+    const ITensorInfo *_dst;
+
+    const Attributes _attributes;
+    const Settings   _settings;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
new file mode 100644
index 0000000000..eada61e1b3
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h"
+#include "src/gpu/cl/kernels/helpers/MatMulKernelHelpers.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+using Attributes = MatMulAttributes;
+using Settings   = GpuMatMulSettings;
+
+Status validate_matmul_kernel_info(Attributes attributes, Settings settings)
+{
+    const bool adj_lhs = attributes.adj_lhs();
+    const bool adj_rhs = attributes.adj_rhs();
+    const int  m0      = settings.m0();
+    const int  n0      = settings.n0();
+    const int  k0      = settings.k0();
+
+    // Validate M0
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(m0 < 1, "Only positive integers are supported for M0");
+
+    if (adj_lhs)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(((m0 & (m0 - 1)) && (m0 != 3)) || (m0 > 16),
+                                        "Only 1,2,3,4,8,16 are supported for M0 for Lhs transposed");
+    }
+
+    // Validate N0
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(n0 < 1, "Only positive integers are supported for N0");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(((n0 & (n0 - 1)) && (n0 != 3)) || (n0 > 16),
+                                    "Only 1,2,3,4,8,16 are supported for N0");
+
+    // Validate K0
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(k0 < 1, "Only positive integers are supported for K0");
+    if (!adj_lhs || adj_rhs)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(((k0 & (k0 - 1)) && (k0 != 3)) || (k0 > 16),
+                                        "Only 1,2,3,4,8,16 are supported for K0");
+    }
+
+    return Status{};
+}
+
+} // namespace
+
+Status ClComponentMatMul::validate(const Properties                &properties,
+                                   const ArgumentPack<ITensorInfo> &tensors,
+                                   const Attributes                &attributes,
+                                   const Settings                  &settings)
+{
+    ARM_COMPUTE_UNUSED(properties);
+    ARM_COMPUTE_UNUSED(attributes);
+
+    const auto lhs = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+    const auto rhs = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+    const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
+
+    // Check if Matching data type
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, dst);
+
+    // Data type
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
+    // Data layout
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(lhs, DataLayout::NHWC);
+
+    // All tensor infos are initialized
+    ARM_COMPUTE_RETURN_ERROR_ON(lhs->tensor_shape().total_size() == 0);
+    ARM_COMPUTE_RETURN_ERROR_ON(rhs->tensor_shape().total_size() == 0);
+    ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+
+    // Device requirements are met
+    ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(lhs);
+
+    // Check if dst shape is correct
+    MatMulKernelInfo matmul_kernel_info =
+        MatMulKernelInfo(attributes.adj_lhs(), attributes.adj_rhs(), settings.m0(), settings.n0(), settings.k0());
+    const auto expected_dst_shape =
+        misc::shape_calculator::compute_matmul_shape(lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), expected_dst_shape);
+
+    // Check if block sizes are supported
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_matmul_kernel_info(attributes, settings));
+
+    ARM_COMPUTE_RETURN_ON_ERROR(
+        opencl::kernels::validate_matmul_input_shapes(lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info));
+
+    return Status{};
+}
+
+ClComponentMatMul::ClComponentMatMul(ComponentId                      id,
+                                     const Properties                &properties,
+                                     const ArgumentPack<ITensorInfo> &tensors,
+                                     const Attributes                &attributes,
+                                     const Settings                  &settings)
+    : IGpuKernelComponent{id, properties, tensors},
+      _component_writer{std::make_unique<GpuCkwMatMul>(id, tensors, attributes, settings)}
+{
+}
+
+ClComponentMatMul::~ClComponentMatMul()
+{
+}
+
+const IGpuCkwComponentDriver *ClComponentMatMul::ckw_component_driver() const
+{
+    return _component_writer.get();
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h
new file mode 100644
index 0000000000..41833e4adb
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTMATMUL_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTMATMUL_H
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h"
+
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+template <typename T>
+class ArgumentPack;
+class MatMulAttributes;
+class GpuCkwMatMul;
+
+class ClComponentMatMul final : public IGpuKernelComponent
+{
+public:
+    /** Attributes are a set of backend-agnostic parameters that define what a component does */
+    using Attributes = MatMulAttributes;
+    /** Settings are a set of backend-specific parameters that influence the implementation of a component */
+    using Settings = GpuMatMulSettings;
+
+    /** Validate the component
+     *
+     * @param[in]     properties Component properties
+     * @param[in,out] tensors    Tensor arguments to the component
+     * @param[in]     attributes Component attributes
+     * @param[in]     settings   Component settings
+     *
+     * @return Status       Validation results
+     *
+     * Tensor argument names:
+     * - ACL_SRC_0: LHS
+     * - ACL_SRC_1: RHS
+     * - ACL_DST_0: Output
+     *
+     * Tensor argument constness:
+     * - ACL_SRC_0: Const
+     * - ACL_SRC_1: Const
+     * - ACL_DST_0: Const
+     *
+     * Valid data layouts:
+     * - NHWC
+     *
+     * Valid data type configurations:
+     * |ACL_SRC_0      |ACL_SRC_1      |ACL_DST_0      |
+     * |:--------------|:--------------|:--------------|
+     * |F16            |F16            |F16            |
+     * |F32            |F32            |F32            |
+     */
+    static Status validate(const Properties                &properties,
+                           const ArgumentPack<ITensorInfo> &tensors,
+                           const Attributes                &attributes,
+                           const Settings                  &settings);
+
+    /** Constructor
+     *
+     * Similar to @ref ClComponentMatMul::validate()
+     */
+    ClComponentMatMul(ComponentId                      id,
+                      const Properties                &properties,
+                      const ArgumentPack<ITensorInfo> &tensors,
+                      const Attributes                &attributes,
+                      const Settings                  &settings);
+    /** Destructor */
+    ~ClComponentMatMul() override;
+    /** Prevent instances of this class from being copy constructed */
+    ClComponentMatMul(const ClComponentMatMul &component) = delete;
+    /** Prevent instances of this class from being copied */
+    ClComponentMatMul &operator=(const ClComponentMatMul &component) = delete;
+    /** Allow instances of this class to be move constructed */
+    ClComponentMatMul(ClComponentMatMul &&component) = default;
+    /** Allow instances of this class to be moved */
+    ClComponentMatMul &operator=(ClComponentMatMul &&component) = default;
+    /** Get writer for the component */
+    const IGpuCkwComponentDriver *ckw_component_driver() const override;
+    /** Get component type */
+    GpuComponentType type() const override
+    {
+        return GpuComponentType::Complex;
+    }
+
+private:
+    std::unique_ptr<GpuCkwMatMul> _component_writer;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTMATMUL_H
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
new file mode 100644
index 0000000000..ee27b5ea47
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include "src/common/utils/Log.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+void calculate_and_init_dst_if_empty(ITensorInfo             *dst,
+                                     const ITensorInfo       *lhs,
+                                     const ITensorInfo       *rhs,
+                                     const MatMulAttributes  &attributes,
+                                     const GpuMatMulSettings &settings)
+{
+    ARM_COMPUTE_UNUSED(attributes);
+
+    if (dst->total_size() == 0U)
+    {
+        const auto dst_shape = misc::shape_calculator::compute_matmul_shape(
+            lhs->tensor_shape(), rhs->tensor_shape(),
+            MatMulKernelInfo(attributes.adj_lhs(), attributes.adj_rhs(), settings.m0(), settings.n0(), settings.k0()));
+
+        auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(dst_shape));
+    }
+}
+
+/* A helper method to reduce the duplication in dst tensor initialization
+*  when calling validate()
+*/
+Status is_supported_op_helper(const GpuWorkloadContext &context,
+                              const ITensorInfo        *lhs,
+                              const ITensorInfo        *rhs,
+                              const ITensorInfo        *dst,
+                              const MatMulAttributes   &attributes,
+                              const GpuMatMulSettings  &settings)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+
+    TensorInfo         dst_info_to_validate;
+    const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
+
+    if (dst != nullptr)
+    {
+        dst_info_to_validate_ptr = dst;
+    }
+
+    calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs, attributes, settings);
+
+    // Check support level
+    // Data type
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
+    // Data layout
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(lhs, DataLayout::NHWC);
+
+    // Check components
+    if (context.gpu_language() == GpuLanguage::OpenCL)
+    {
+        const auto cl_compile_ctx = context.cl_compile_context();
+        ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
+        // Validate MatMul Component
+        {
+            const auto properties =
+                IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+
+            ArgumentPack<ITensorInfo> arguments;
+            arguments.add_const_tensor(ACL_SRC_0, lhs);
+            arguments.add_const_tensor(ACL_SRC_1, rhs);
+            arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
+
+            ARM_COMPUTE_RETURN_ON_ERROR(ClComponentMatMul::validate(properties, arguments, attributes, settings));
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_RETURN_ERROR_MSG("Unimplemented Gpu language");
+    }
+    return Status{};
+}
+
+constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
+} // namespace
+
+int GpuMatMulSettings::n0() const
+{
+    return _n0;
+}
+
+GpuMatMulSettings &GpuMatMulSettings::n0(int n0)
+{
+    _n0 = n0;
+    return *this;
+}
+
+int GpuMatMulSettings::m0() const
+{
+    return _m0;
+}
+
+GpuMatMulSettings &GpuMatMulSettings::m0(int m0)
+{
+    _m0 = m0;
+    return *this;
+}
+
+int GpuMatMulSettings::k0() const
+{
+    return _k0;
+}
+
+GpuMatMulSettings &GpuMatMulSettings::k0(int k0)
+{
+    _k0 = k0;
+    return *this;
+}
+
+Status GpuMatMul::is_supported_op(const GpuWorkloadContext &context,
+                                  const ITensorInfo        *lhs,
+                                  const ITensorInfo        *rhs,
+                                  const MatMulAttributes   &attributes,
+                                  const GpuMatMulSettings  &settings)
+{
+    return is_supported_op_helper(context, lhs, rhs, nullptr, attributes, settings);
+}
+
+Status GpuMatMul::validate_op(const GpuWorkloadSketch &sketch,
+                              const ITensorInfo       *lhs,
+                              const ITensorInfo       *rhs,
+                              const MatMulAttributes  &attributes,
+                              const GpuMatMulSettings &settings)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+
+    // Check if tensors have valid id. I.e. they are created from a sketch
+    ARM_COMPUTE_RETURN_ERROR_ON(!lhs->has_valid_id() || !rhs->has_valid_id());
+
+    // Refer to GpuMatmul::validate_op() for id-validness of this TensorInfo object
+    TensorInfo dst_info_to_validate;
+
+    // Auto initialize dst tensor info
+    calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs, attributes, settings);
+
+    // Perform fusion test
+    // Check if operator meets fusion constraints
+    ArgumentPack<ITensorInfo> tensors;
+    tensors.add_const_tensor(ACL_SRC_0, lhs);
+    tensors.add_const_tensor(ACL_SRC_1, rhs);
+    tensors.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+    const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op),
+                                    "Operator fusion test failed. This operator cannot be fused into the workload");
+
+    // Check if configuration is supported
+    return is_supported_op_helper(*sketch.gpu_context(), lhs, rhs, &dst_info_to_validate, attributes, settings);
+}
+
+ITensorInfo *GpuMatMul::create_op(GpuWorkloadSketch &sketch,
+                                  ITensorInfo       *lhs,
+                                  ITensorInfo       *rhs,
+                                  const Attributes  &attributes,
+                                  const Settings    &settings)
+{
+    ARM_COMPUTE_LOG_PARAMS(lhs, rhs, attributes, settings);
+
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+
+    // Assert validation
+    ARM_COMPUTE_ERROR_THROW_ON(GpuMatMul::validate_op(sketch, lhs, rhs, attributes, settings));
+    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
+
+    // Auto initialize dst tensor
+    calculate_and_init_dst_if_empty(dst, lhs, rhs, attributes, settings);
+
+    // Translate into components and add to component graph
+    auto      &comp_graph = sketch.implementation().component_graph();
+    const auto sketch_ctx = sketch.implementation().context();
+
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    {
+        auto properties = IGpuKernelComponent::Properties();
+        properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+
+        ArgumentPack<ITensorInfo> arguments;
+        arguments.add_const_tensor(ACL_SRC_0, lhs);
+        arguments.add_const_tensor(ACL_SRC_1, rhs);
+        arguments.add_const_tensor(ACL_DST_0, dst);
+        comp_graph.add_new_component<ClComponentMatMul>(properties, arguments, attributes, settings);
+    }
+    else
+    {
+        ARM_COMPUTE_ERROR("Unimplemented Gpu language");
+    }
+
+    // Set up fusion test by adding to the Operator Group
+    // Note this has to be performed after all the components have been successfully added to the component graph
+
+    // Pack tensor infos
+    ArgumentPack<ITensorInfo> tensors;
+    tensors.add_const_tensor(ACL_SRC_0, lhs);
+    tensors.add_const_tensor(ACL_SRC_1, rhs);
+    tensors.add_const_tensor(ACL_DST_0, dst);
+
+    const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+    sketch.implementation().operator_group().add_operator(op);
+
+    return dst;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
-- 
cgit v1.2.1