Port MatMul to Dynamic Fusion + CKW boilerplate code

- Port Matmaul to to Dynamic Fusion. - Prepare a CKW boilerplate code. - Implement the following classes: - MatMulAttributes - GPUMatMulSettings - GpuMatMul - ClComponentMatMul - GpuCkwMatMul Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com> Change-Id: I5a7c183b293973e8a4233b554b2affe0bb28f44d Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10453 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Adnan AlSinan <adnan.alsinan@arm.com> 2023-10-06 19:07:48 +0100
committer: Adnan AlSinan <adnan.alsinan@arm.com> 2023-10-10 10:33:15 +0000
commit: d9c1d44185e4fea72ce4d37d76021237fe64eeaf (patch)
tree: 3b9f569f2286cfd91c9370fe6cef094779fc460b
parent: 0b72aa4b2abdba7ab48aaa8a45c624ba1e27a411 (diff)
download: ComputeLibrary-d9c1d44185e4fea72ce4d37d76021237fe64eeaf.tar.gz
10 files changed, 933 insertions, 0 deletions
diff --git a/Android.bp b/Android.bp
index 5760b7d08e..9da5657172 100644
--- a/Android.bp
+++ b/Android.bp
@@ -618,6 +618,7 @@ cc_library_static {
         "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
         "src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
         "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
+        "src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp",
         "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp",
         "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp",
         "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp",
@@ -640,6 +641,7 @@ cc_library_static {
         "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.cpp",
         "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp",
         "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
+        "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp",
         "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp",
         "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp",
         "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
@@ -650,6 +652,7 @@ cc_library_static {
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp",
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp",
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp",
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp",
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp",
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp",
@@ -659,6 +662,7 @@ cc_library_static {
         "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
         "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
         "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp",
         "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
         "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp",
         "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
diff --git a/arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h b/arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h
new file mode 100644
index 0000000000..fc512e9ff9
--- /dev/null
+++ b/arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_MATMULATTRIBUTES_H
+#define ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_MATMULATTRIBUTES_H
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Attributes are backend-agnostic parameters (in addition to the input/output tensors) of an operator.
+ */
+
+/** MatMul attributes */
+class MatMulAttributes
+{
+public:
+    /* Get adjoint LHS flag value (transpose LHS before multiplication) */
+    bool adj_lhs() const;
+
+    /* Get adjoint RHS flag value (transpose RHS before multiplication) */
+    bool adj_rhs() const;
+
+    /* Set adjoint LHS flag value (transpose LHS before multiplication) */
+    MatMulAttributes adj_lhs(bool adj_lhs);
+
+    /* Set adjoint RHS flag value (transpose RHS before multiplication) */
+    MatMulAttributes adj_rhs(bool adj_rhs);
+
+private:
+    bool _adj_lhs{false};
+    bool _adj_rhs{false};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_MATMULATTRIBUTES_H
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h
new file mode 100644
index 0000000000..6ba5b1db93
--- /dev/null
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMATMUL_H
+#define ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMATMUL_H
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+class GpuWorkloadContext;
+class GpuWorkloadSketch;
+
+/** Operator backend specific settings
+*/
+class GpuMatMulSettings
+{
+public:
+    /** Set N0: number of columns processed by each work-item */
+    GpuMatMulSettings &n0(int n0);
+    /** Get N0: number of columns processed by each work-item */
+    int n0() const;
+
+    /** Set M0: number of rows processed by each work-item */
+    GpuMatMulSettings &m0(int m0);
+    /** Get M0: number of rows processed by each work-item */
+    int m0() const;
+
+    /** Set K0: number of inner accumulations */
+    GpuMatMulSettings &k0(int k0);
+    /** Get K0: number of inner accumulations */
+    int k0() const;
+
+private:
+    int _n0{0}; /**< Number of columns processed by each work-item */
+    int _m0{0}; /**< Number of rows processed by each work-item */
+    int _k0{0}; /**< Number of inner accumulations */
+};
+
+/** Operator interface. */
+class GpuMatMul final
+{
+public:
+    /* Attributes are a set of backend-agnostic parameters that define what an operator does */
+    using Attributes = MatMulAttributes;
+    /* Settings are a set of backend-specific parameters that influence the implementation of a operator */
+    using Settings = GpuMatMulSettings;
+
+    /* Create an operator and fuse it into the workload sketch.
+     *    @note If @ref validate_op() fails, the creation also fails and may throw an error.
+     *    @note If @ref validate_op() fails, @p sketch remains unchanged and valid.
+     *
+     * Valid data type configurations:
+     * |LHS            |RHS            |dst            |
+     * |:--------------|:--------------|:--------------|
+     * |F16            |F16            |F16            |
+     * |F32            |F32            |F32            |
+     *
+     *
+     * @param[in,out] sketch     Workload sketch into which the operator will be fused
+     * @param[in]     lhs        Input tensor info for the LHS matrix. Data type supported: F32/F16. Dimensions above 2 are collapsed onto dimension 2 and represent the batch.
+     * @param[in]     rhs        Input tensor info for the RHS matrix. Data type supported: same as @p lhs. Dimensions above 2 are collapsed onto dimension 2 and represent the batch.
+     * @param[in]     attributes Operator attributes
+     * @param[in]     settings   Operator settings
+     *
+     * @return Pointer for the destination tensor info
+     */
+    static ITensorInfo *create_op(GpuWorkloadSketch &sketch,
+                                  ITensorInfo       *lhs,
+                                  ITensorInfo       *rhs,
+                                  const Attributes  &attributes,
+                                  const Settings    &settings);
+
+    /* Check if the operator configuration is supported, irrespective of fusion
+     *
+     * @param[in] context    Workload context within which the operator is running
+     * @param[in] lhs        Input tensor info for the LHS matrix.
+     * @param[in] rhs        Input tensor info for the RHS matrix.
+     * @param[in] attributes Operator attributes
+     * @param[in] settings   Operator settings
+     *
+     * @return Status
+     */
+    static Status is_supported_op(const GpuWorkloadContext &context,
+                                  const ITensorInfo        *lhs,
+                                  const ITensorInfo        *rhs,
+                                  const Attributes         &attributes,
+                                  const Settings           &settings);
+
+    /* Check if the operator configuration is supported and if it can be fused into the workload sketch.
+     *
+     * Parameters are similar to @ref GpuMatMul::create_op()
+     *
+     * @return Status
+     */
+    static Status validate_op(const GpuWorkloadSketch &sketch,
+                              const ITensorInfo       *lhs,
+                              const ITensorInfo       *rhs,
+                              const Attributes        &attributes,
+                              const Settings          &settings);
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMATMUL_H
diff --git a/filelist.json b/filelist.json
index aeb54c6d9d..557c2b12cb 100644
--- a/filelist.json
+++ b/filelist.json
@@ -2294,6 +2294,7 @@
         "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
         "src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
         "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
+        "src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp",
         "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp",
         "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp",
         "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp",
@@ -2310,6 +2311,7 @@
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp",
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp",
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp",
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp",
         "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp",
@@ -2322,6 +2324,7 @@
         "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
         "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
         "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp",
         "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
         "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp",
         "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
@@ -2357,6 +2360,7 @@
         "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp",
         "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
         "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp",
         "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp",
         "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
         "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp",
diff --git a/src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp b/src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp
new file mode 100644
index 0000000000..027b550377
--- /dev/null
+++ b/src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+MatMulAttributes MatMulAttributes::adj_lhs(bool adj_lhs)
+{
+    _adj_lhs = adj_lhs;
+    return *this;
+}
+MatMulAttributes MatMulAttributes::adj_rhs(bool adj_rhs)
+{
+    _adj_rhs = adj_rhs;
+    return *this;
+}
+bool MatMulAttributes::adj_lhs() const
+{
+    return _adj_lhs;
+}
+bool MatMulAttributes::adj_rhs() const
+{
+    return _adj_rhs;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp
new file mode 100644
index 0000000000..77e5f7af01
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h"
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+GpuCkwMatMul::GpuCkwMatMul(ComponentId                      id,
+                           const ArgumentPack<ITensorInfo> &tensors,
+                           const Attributes                &attributes,
+                           const Settings                  &settings)
+    : IGpuCkwComponentDriver{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}, _settings{settings}
+{
+    _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst);
+}
+
+void GpuCkwMatMul::write_component_code(const ComponentGroup    &comp_group,
+                                        GpuCkwVariableTable     &vtable,
+                                        GpuCkwScopedKernelWriter writer) const
+{
+    ARM_COMPUTE_UNUSED(comp_group, vtable, writer);
+}
+
+Window GpuCkwMatMul::get_window() const
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+    return Window();
+}
+
+std::string GpuCkwMatMul::get_name(const ComponentGroup &comp_group) const
+{
+    ARM_COMPUTE_UNUSED(comp_group);
+
+    return "MatMul";
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h
new file mode 100644
index 0000000000..ae2ea09f05
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
+
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwMatMul final : public IGpuCkwComponentDriver
+{
+public:
+    using Attributes = ClComponentMatMul::Attributes;
+    using Settings   = ClComponentMatMul::Settings;
+
+public:
+    /** Constructor
+     *
+     * For supported configurations please refer to @ref ClComponentMatMul::validate()
+     *
+     * @param[in] id         Component id
+     * @param[in] tensors    Tensor arguments to the component
+     * @param[in] attributes Component attributes. Attributes are a set of parameters that define what a component does
+     * @param[in] settings   Component settings. Settings are a set of parameters that influence the implementation of a component
+     */
+    GpuCkwMatMul(ComponentId                      id,
+                 const ArgumentPack<ITensorInfo> &tensors,
+                 const Attributes                &attributes,
+                 const Settings                  &settings);
+
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwMatMul);
+
+    /** Destructor */
+    ~GpuCkwMatMul() override = default;
+
+    // Inherited methods overriden
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+    std::string  get_name(const ComponentGroup &comp_group) const override;
+
+private:
+    const ITensorInfo *_lhs;
+    const ITensorInfo *_rhs;
+    const ITensorInfo *_dst;
+
+    const Attributes _attributes;
+    const Settings   _settings;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
new file mode 100644
index 0000000000..eada61e1b3
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h"
+#include "src/gpu/cl/kernels/helpers/MatMulKernelHelpers.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+using Attributes = MatMulAttributes;
+using Settings   = GpuMatMulSettings;
+
+Status validate_matmul_kernel_info(Attributes attributes, Settings settings)
+{
+    const bool adj_lhs = attributes.adj_lhs();
+    const bool adj_rhs = attributes.adj_rhs();
+    const int  m0      = settings.m0();
+    const int  n0      = settings.n0();
+    const int  k0      = settings.k0();
+
+    // Validate M0
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(m0 < 1, "Only positive integers are supported for M0");
+
+    if (adj_lhs)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(((m0 & (m0 - 1)) && (m0 != 3)) || (m0 > 16),
+                                        "Only 1,2,3,4,8,16 are supported for M0 for Lhs transposed");
+    }
+
+    // Validate N0
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(n0 < 1, "Only positive integers are supported for N0");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(((n0 & (n0 - 1)) && (n0 != 3)) || (n0 > 16),
+                                    "Only 1,2,3,4,8,16 are supported for N0");
+
+    // Validate K0
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(k0 < 1, "Only positive integers are supported for K0");
+    if (!adj_lhs || adj_rhs)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(((k0 & (k0 - 1)) && (k0 != 3)) || (k0 > 16),
+                                        "Only 1,2,3,4,8,16 are supported for K0");
+    }
+
+    return Status{};
+}
+
+} // namespace
+
+Status ClComponentMatMul::validate(const Properties                &properties,
+                                   const ArgumentPack<ITensorInfo> &tensors,
+                                   const Attributes                &attributes,
+                                   const Settings                  &settings)
+{
+    ARM_COMPUTE_UNUSED(properties);
+    ARM_COMPUTE_UNUSED(attributes);
+
+    const auto lhs = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+    const auto rhs = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+    const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
+
+    // Check if Matching data type
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, dst);
+
+    // Data type
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
+    // Data layout
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(lhs, DataLayout::NHWC);
+
+    // All tensor infos are initialized
+    ARM_COMPUTE_RETURN_ERROR_ON(lhs->tensor_shape().total_size() == 0);
+    ARM_COMPUTE_RETURN_ERROR_ON(rhs->tensor_shape().total_size() == 0);
+    ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+
+    // Device requirements are met
+    ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(lhs);
+
+    // Check if dst shape is correct
+    MatMulKernelInfo matmul_kernel_info =
+        MatMulKernelInfo(attributes.adj_lhs(), attributes.adj_rhs(), settings.m0(), settings.n0(), settings.k0());
+    const auto expected_dst_shape =
+        misc::shape_calculator::compute_matmul_shape(lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), expected_dst_shape);
+
+    // Check if block sizes are supported
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_matmul_kernel_info(attributes, settings));
+
+    ARM_COMPUTE_RETURN_ON_ERROR(
+        opencl::kernels::validate_matmul_input_shapes(lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info));
+
+    return Status{};
+}
+
+ClComponentMatMul::ClComponentMatMul(ComponentId                      id,
+                                     const Properties                &properties,
+                                     const ArgumentPack<ITensorInfo> &tensors,
+                                     const Attributes                &attributes,
+                                     const Settings                  &settings)
+    : IGpuKernelComponent{id, properties, tensors},
+      _component_writer{std::make_unique<GpuCkwMatMul>(id, tensors, attributes, settings)}
+{
+}
+
+ClComponentMatMul::~ClComponentMatMul()
+{
+}
+
+const IGpuCkwComponentDriver *ClComponentMatMul::ckw_component_driver() const
+{
+    return _component_writer.get();
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h
new file mode 100644
index 0000000000..41833e4adb
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTMATMUL_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTMATMUL_H
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h"
+
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+template <typename T>
+class ArgumentPack;
+class MatMulAttributes;
+class GpuCkwMatMul;
+
+class ClComponentMatMul final : public IGpuKernelComponent
+{
+public:
+    /** Attributes are a set of backend-agnostic parameters that define what a component does */
+    using Attributes = MatMulAttributes;
+    /** Settings are a set of backend-specific parameters that influence the implementation of a component */
+    using Settings = GpuMatMulSettings;
+
+    /** Validate the component
+     *
+     * @param[in]     properties Component properties
+     * @param[in,out] tensors    Tensor arguments to the component
+     * @param[in]     attributes Component attributes
+     * @param[in]     settings   Component settings
+     *
+     * @return Status       Validation results
+     *
+     * Tensor argument names:
+     * - ACL_SRC_0: LHS
+     * - ACL_SRC_1: RHS
+     * - ACL_DST_0: Output
+     *
+     * Tensor argument constness:
+     * - ACL_SRC_0: Const
+     * - ACL_SRC_1: Const
+     * - ACL_DST_0: Const
+     *
+     * Valid data layouts:
+     * - NHWC
+     *
+     * Valid data type configurations:
+     * |ACL_SRC_0      |ACL_SRC_1      |ACL_DST_0      |
+     * |:--------------|:--------------|:--------------|
+     * |F16            |F16            |F16            |
+     * |F32            |F32            |F32            |
+     */
+    static Status validate(const Properties                &properties,
+                           const ArgumentPack<ITensorInfo> &tensors,
+                           const Attributes                &attributes,
+                           const Settings                  &settings);
+
+    /** Constructor
+     *
+     * Similar to @ref ClComponentMatMul::validate()
+     */
+    ClComponentMatMul(ComponentId                      id,
+                      const Properties                &properties,
+                      const ArgumentPack<ITensorInfo> &tensors,
+                      const Attributes                &attributes,
+                      const Settings                  &settings);
+    /** Destructor */
+    ~ClComponentMatMul() override;
+    /** Prevent instances of this class from being copy constructed */
+    ClComponentMatMul(const ClComponentMatMul &component) = delete;
+    /** Prevent instances of this class from being copied */
+    ClComponentMatMul &operator=(const ClComponentMatMul &component) = delete;
+    /** Allow instances of this class to be move constructed */
+    ClComponentMatMul(ClComponentMatMul &&component) = default;
+    /** Allow instances of this class to be moved */
+    ClComponentMatMul &operator=(ClComponentMatMul &&component) = default;
+    /** Get writer for the component */
+    const IGpuCkwComponentDriver *ckw_component_driver() const override;
+    /** Get component type */
+    GpuComponentType type() const override
+    {
+        return GpuComponentType::Complex;
+    }
+
+private:
+    std::unique_ptr<GpuCkwMatMul> _component_writer;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTMATMUL_H
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
new file mode 100644
index 0000000000..ee27b5ea47
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include "src/common/utils/Log.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+void calculate_and_init_dst_if_empty(ITensorInfo             *dst,
+                                     const ITensorInfo       *lhs,
+                                     const ITensorInfo       *rhs,
+                                     const MatMulAttributes  &attributes,
+                                     const GpuMatMulSettings &settings)
+{
+    ARM_COMPUTE_UNUSED(attributes);
+
+    if (dst->total_size() == 0U)
+    {
+        const auto dst_shape = misc::shape_calculator::compute_matmul_shape(
+            lhs->tensor_shape(), rhs->tensor_shape(),
+            MatMulKernelInfo(attributes.adj_lhs(), attributes.adj_rhs(), settings.m0(), settings.n0(), settings.k0()));
+
+        auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(dst_shape));
+    }
+}
+
+/* A helper method to reduce the duplication in dst tensor initialization
+*  when calling validate()
+*/
+Status is_supported_op_helper(const GpuWorkloadContext &context,
+                              const ITensorInfo        *lhs,
+                              const ITensorInfo        *rhs,
+                              const ITensorInfo        *dst,
+                              const MatMulAttributes   &attributes,
+                              const GpuMatMulSettings  &settings)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+
+    TensorInfo         dst_info_to_validate;
+    const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
+
+    if (dst != nullptr)
+    {
+        dst_info_to_validate_ptr = dst;
+    }
+
+    calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs, attributes, settings);
+
+    // Check support level
+    // Data type
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
+    // Data layout
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(lhs, DataLayout::NHWC);
+
+    // Check components
+    if (context.gpu_language() == GpuLanguage::OpenCL)
+    {
+        const auto cl_compile_ctx = context.cl_compile_context();
+        ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
+        // Validate MatMul Component
+        {
+            const auto properties =
+                IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+
+            ArgumentPack<ITensorInfo> arguments;
+            arguments.add_const_tensor(ACL_SRC_0, lhs);
+            arguments.add_const_tensor(ACL_SRC_1, rhs);
+            arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
+
+            ARM_COMPUTE_RETURN_ON_ERROR(ClComponentMatMul::validate(properties, arguments, attributes, settings));
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_RETURN_ERROR_MSG("Unimplemented Gpu language");
+    }
+    return Status{};
+}
+
+constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
+} // namespace
+
+int GpuMatMulSettings::n0() const
+{
+    return _n0;
+}
+
+GpuMatMulSettings &GpuMatMulSettings::n0(int n0)
+{
+    _n0 = n0;
+    return *this;
+}
+
+int GpuMatMulSettings::m0() const
+{
+    return _m0;
+}
+
+GpuMatMulSettings &GpuMatMulSettings::m0(int m0)
+{
+    _m0 = m0;
+    return *this;
+}
+
+int GpuMatMulSettings::k0() const
+{
+    return _k0;
+}
+
+GpuMatMulSettings &GpuMatMulSettings::k0(int k0)
+{
+    _k0 = k0;
+    return *this;
+}
+
+Status GpuMatMul::is_supported_op(const GpuWorkloadContext &context,
+                                  const ITensorInfo        *lhs,
+                                  const ITensorInfo        *rhs,
+                                  const MatMulAttributes   &attributes,
+                                  const GpuMatMulSettings  &settings)
+{
+    return is_supported_op_helper(context, lhs, rhs, nullptr, attributes, settings);
+}
+
+Status GpuMatMul::validate_op(const GpuWorkloadSketch &sketch,
+                              const ITensorInfo       *lhs,
+                              const ITensorInfo       *rhs,
+                              const MatMulAttributes  &attributes,
+                              const GpuMatMulSettings &settings)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+
+    // Check if tensors have valid id. I.e. they are created from a sketch
+    ARM_COMPUTE_RETURN_ERROR_ON(!lhs->has_valid_id() || !rhs->has_valid_id());
+
+    // Refer to GpuMatmul::validate_op() for id-validness of this TensorInfo object
+    TensorInfo dst_info_to_validate;
+
+    // Auto initialize dst tensor info
+    calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs, attributes, settings);
+
+    // Perform fusion test
+    // Check if operator meets fusion constraints
+    ArgumentPack<ITensorInfo> tensors;
+    tensors.add_const_tensor(ACL_SRC_0, lhs);
+    tensors.add_const_tensor(ACL_SRC_1, rhs);
+    tensors.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+    const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op),
+                                    "Operator fusion test failed. This operator cannot be fused into the workload");
+
+    // Check if configuration is supported
+    return is_supported_op_helper(*sketch.gpu_context(), lhs, rhs, &dst_info_to_validate, attributes, settings);
+}
+
+ITensorInfo *GpuMatMul::create_op(GpuWorkloadSketch &sketch,
+                                  ITensorInfo       *lhs,
+                                  ITensorInfo       *rhs,
+                                  const Attributes  &attributes,
+                                  const Settings    &settings)
+{
+    ARM_COMPUTE_LOG_PARAMS(lhs, rhs, attributes, settings);
+
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+
+    // Assert validation
+    ARM_COMPUTE_ERROR_THROW_ON(GpuMatMul::validate_op(sketch, lhs, rhs, attributes, settings));
+    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
+
+    // Auto initialize dst tensor
+    calculate_and_init_dst_if_empty(dst, lhs, rhs, attributes, settings);
+
+    // Translate into components and add to component graph
+    auto      &comp_graph = sketch.implementation().component_graph();
+    const auto sketch_ctx = sketch.implementation().context();
+
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    {
+        auto properties = IGpuKernelComponent::Properties();
+        properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+
+        ArgumentPack<ITensorInfo> arguments;
+        arguments.add_const_tensor(ACL_SRC_0, lhs);
+        arguments.add_const_tensor(ACL_SRC_1, rhs);
+        arguments.add_const_tensor(ACL_DST_0, dst);
+        comp_graph.add_new_component<ClComponentMatMul>(properties, arguments, attributes, settings);
+    }
+    else
+    {
+        ARM_COMPUTE_ERROR("Unimplemented Gpu language");
+    }
+
+    // Set up fusion test by adding to the Operator Group
+    // Note this has to be performed after all the components have been successfully added to the component graph
+
+    // Pack tensor infos
+    ArgumentPack<ITensorInfo> tensors;
+    tensors.add_const_tensor(ACL_SRC_0, lhs);
+    tensors.add_const_tensor(ACL_SRC_1, rhs);
+    tensors.add_const_tensor(ACL_DST_0, dst);
+
+    const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+    sketch.implementation().operator_group().add_operator(op);
+
+    return dst;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
author	Adnan AlSinan <adnan.alsinan@arm.com>	2023-10-06 19:07:48 +0100
committer	Adnan AlSinan <adnan.alsinan@arm.com>	2023-10-10 10:33:15 +0000
commit	d9c1d44185e4fea72ce4d37d76021237fe64eeaf (patch)
tree	3b9f569f2286cfd91c9370fe6cef094779fc460b
parent	0b72aa4b2abdba7ab48aaa8a45c624ba1e27a411 (diff)
download	ComputeLibrary-d9c1d44185e4fea72ce4d37d76021237fe64eeaf.tar.gz