aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdnan AlSinan <adnan.alsinan@arm.com>2023-10-06 19:07:48 +0100
committerAdnan AlSinan <adnan.alsinan@arm.com>2023-10-10 10:33:15 +0000
commitd9c1d44185e4fea72ce4d37d76021237fe64eeaf (patch)
tree3b9f569f2286cfd91c9370fe6cef094779fc460b
parent0b72aa4b2abdba7ab48aaa8a45c624ba1e27a411 (diff)
downloadComputeLibrary-d9c1d44185e4fea72ce4d37d76021237fe64eeaf.tar.gz
Port MatMul to Dynamic Fusion + CKW boilerplate code
- Port Matmaul to to Dynamic Fusion. - Prepare a CKW boilerplate code. - Implement the following classes: - MatMulAttributes - GPUMatMulSettings - GpuMatMul - ClComponentMatMul - GpuCkwMatMul Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com> Change-Id: I5a7c183b293973e8a4233b554b2affe0bb28f44d Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10453 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp4
-rw-r--r--arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h62
-rw-r--r--arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h132
-rw-r--r--filelist.json4
-rw-r--r--src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp52
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp71
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h85
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp151
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h123
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp249
10 files changed, 933 insertions, 0 deletions
diff --git a/Android.bp b/Android.bp
index 5760b7d08e..9da5657172 100644
--- a/Android.bp
+++ b/Android.bp
@@ -618,6 +618,7 @@ cc_library_static {
"src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp",
@@ -640,6 +641,7 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
@@ -650,6 +652,7 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp",
@@ -659,6 +662,7 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
diff --git a/arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h b/arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h
new file mode 100644
index 0000000000..fc512e9ff9
--- /dev/null
+++ b/arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_MATMULATTRIBUTES_H
+#define ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_MATMULATTRIBUTES_H
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Attributes are backend-agnostic parameters (in addition to the input/output tensors) of an operator.
+ */
+
+/** MatMul attributes */
+class MatMulAttributes
+{
+public:
+ /* Get adjoint LHS flag value (transpose LHS before multiplication) */
+ bool adj_lhs() const;
+
+ /* Get adjoint RHS flag value (transpose RHS before multiplication) */
+ bool adj_rhs() const;
+
+ /* Set adjoint LHS flag value (transpose LHS before multiplication) */
+ MatMulAttributes adj_lhs(bool adj_lhs);
+
+ /* Set adjoint RHS flag value (transpose RHS before multiplication) */
+ MatMulAttributes adj_rhs(bool adj_rhs);
+
+private:
+ bool _adj_lhs{false};
+ bool _adj_rhs{false};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_MATMULATTRIBUTES_H
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h
new file mode 100644
index 0000000000..6ba5b1db93
--- /dev/null
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMATMUL_H
+#define ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMATMUL_H
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+class GpuWorkloadContext;
+class GpuWorkloadSketch;
+
+/** Operator backend specific settings
+*/
+class GpuMatMulSettings
+{
+public:
+ /** Set N0: number of columns processed by each work-item */
+ GpuMatMulSettings &n0(int n0);
+ /** Get N0: number of columns processed by each work-item */
+ int n0() const;
+
+ /** Set M0: number of rows processed by each work-item */
+ GpuMatMulSettings &m0(int m0);
+ /** Get M0: number of rows processed by each work-item */
+ int m0() const;
+
+ /** Set K0: number of inner accumulations */
+ GpuMatMulSettings &k0(int k0);
+ /** Get K0: number of inner accumulations */
+ int k0() const;
+
+private:
+ int _n0{0}; /**< Number of columns processed by each work-item */
+ int _m0{0}; /**< Number of rows processed by each work-item */
+ int _k0{0}; /**< Number of inner accumulations */
+};
+
+/** Operator interface. */
+class GpuMatMul final
+{
+public:
+ /* Attributes are a set of backend-agnostic parameters that define what an operator does */
+ using Attributes = MatMulAttributes;
+ /* Settings are a set of backend-specific parameters that influence the implementation of a operator */
+ using Settings = GpuMatMulSettings;
+
+ /* Create an operator and fuse it into the workload sketch.
+ * @note If @ref validate_op() fails, the creation also fails and may throw an error.
+ * @note If @ref validate_op() fails, @p sketch remains unchanged and valid.
+ *
+ * Valid data type configurations:
+ * |LHS |RHS |dst |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
+ *
+ * @param[in,out] sketch Workload sketch into which the operator will be fused
+ * @param[in] lhs Input tensor info for the LHS matrix. Data type supported: F32/F16. Dimensions above 2 are collapsed onto dimension 2 and represent the batch.
+ * @param[in] rhs Input tensor info for the RHS matrix. Data type supported: same as @p lhs. Dimensions above 2 are collapsed onto dimension 2 and represent the batch.
+ * @param[in] attributes Operator attributes
+ * @param[in] settings Operator settings
+ *
+ * @return Pointer for the destination tensor info
+ */
+ static ITensorInfo *create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *lhs,
+ ITensorInfo *rhs,
+ const Attributes &attributes,
+ const Settings &settings);
+
+ /* Check if the operator configuration is supported, irrespective of fusion
+ *
+ * @param[in] context Workload context within which the operator is running
+ * @param[in] lhs Input tensor info for the LHS matrix.
+ * @param[in] rhs Input tensor info for the RHS matrix.
+ * @param[in] attributes Operator attributes
+ * @param[in] settings Operator settings
+ *
+ * @return Status
+ */
+ static Status is_supported_op(const GpuWorkloadContext &context,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const Attributes &attributes,
+ const Settings &settings);
+
+ /* Check if the operator configuration is supported and if it can be fused into the workload sketch.
+ *
+ * Parameters are similar to @ref GpuMatMul::create_op()
+ *
+ * @return Status
+ */
+ static Status validate_op(const GpuWorkloadSketch &sketch,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const Attributes &attributes,
+ const Settings &settings);
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMATMUL_H
diff --git a/filelist.json b/filelist.json
index aeb54c6d9d..557c2b12cb 100644
--- a/filelist.json
+++ b/filelist.json
@@ -2294,6 +2294,7 @@
"src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp",
@@ -2310,6 +2311,7 @@
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp",
@@ -2322,6 +2324,7 @@
"src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
@@ -2357,6 +2360,7 @@
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp",
diff --git a/src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp b/src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp
new file mode 100644
index 0000000000..027b550377
--- /dev/null
+++ b/src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+MatMulAttributes MatMulAttributes::adj_lhs(bool adj_lhs)
+{
+ _adj_lhs = adj_lhs;
+ return *this;
+}
+MatMulAttributes MatMulAttributes::adj_rhs(bool adj_rhs)
+{
+ _adj_rhs = adj_rhs;
+ return *this;
+}
+bool MatMulAttributes::adj_lhs() const
+{
+ return _adj_lhs;
+}
+bool MatMulAttributes::adj_rhs() const
+{
+ return _adj_rhs;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp
new file mode 100644
index 0000000000..77e5f7af01
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h"
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+GpuCkwMatMul::GpuCkwMatMul(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+ : IGpuCkwComponentDriver{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}, _settings{settings}
+{
+ _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+ _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
+ _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst);
+}
+
+void GpuCkwMatMul::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
+{
+ ARM_COMPUTE_UNUSED(comp_group, vtable, writer);
+}
+
+Window GpuCkwMatMul::get_window() const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+ return Window();
+}
+
+std::string GpuCkwMatMul::get_name(const ComponentGroup &comp_group) const
+{
+ ARM_COMPUTE_UNUSED(comp_group);
+
+ return "MatMul";
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h
new file mode 100644
index 0000000000..ae2ea09f05
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
+
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwMatMul final : public IGpuCkwComponentDriver
+{
+public:
+ using Attributes = ClComponentMatMul::Attributes;
+ using Settings = ClComponentMatMul::Settings;
+
+public:
+ /** Constructor
+ *
+ * For supported configurations please refer to @ref ClComponentMatMul::validate()
+ *
+ * @param[in] id Component id
+ * @param[in] tensors Tensor arguments to the component
+ * @param[in] attributes Component attributes. Attributes are a set of parameters that define what a component does
+ * @param[in] settings Component settings. Settings are a set of parameters that influence the implementation of a component
+ */
+ GpuCkwMatMul(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
+
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwMatMul);
+
+ /** Destructor */
+ ~GpuCkwMatMul() override = default;
+
+ // Inherited methods overriden
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const override;
+ Window get_window() const override;
+ std::string get_name(const ComponentGroup &comp_group) const override;
+
+private:
+ const ITensorInfo *_lhs;
+ const ITensorInfo *_rhs;
+ const ITensorInfo *_dst;
+
+ const Attributes _attributes;
+ const Settings _settings;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
new file mode 100644
index 0000000000..eada61e1b3
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h"
+#include "src/gpu/cl/kernels/helpers/MatMulKernelHelpers.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+using Attributes = MatMulAttributes;
+using Settings = GpuMatMulSettings;
+
+Status validate_matmul_kernel_info(Attributes attributes, Settings settings)
+{
+ const bool adj_lhs = attributes.adj_lhs();
+ const bool adj_rhs = attributes.adj_rhs();
+ const int m0 = settings.m0();
+ const int n0 = settings.n0();
+ const int k0 = settings.k0();
+
+ // Validate M0
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(m0 < 1, "Only positive integers are supported for M0");
+
+ if (adj_lhs)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(((m0 & (m0 - 1)) && (m0 != 3)) || (m0 > 16),
+ "Only 1,2,3,4,8,16 are supported for M0 for Lhs transposed");
+ }
+
+ // Validate N0
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(n0 < 1, "Only positive integers are supported for N0");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(((n0 & (n0 - 1)) && (n0 != 3)) || (n0 > 16),
+ "Only 1,2,3,4,8,16 are supported for N0");
+
+ // Validate K0
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(k0 < 1, "Only positive integers are supported for K0");
+ if (!adj_lhs || adj_rhs)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(((k0 & (k0 - 1)) && (k0 != 3)) || (k0 > 16),
+ "Only 1,2,3,4,8,16 are supported for K0");
+ }
+
+ return Status{};
+}
+
+} // namespace
+
+Status ClComponentMatMul::validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+{
+ ARM_COMPUTE_UNUSED(properties);
+ ARM_COMPUTE_UNUSED(attributes);
+
+ const auto lhs = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ const auto rhs = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+ const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
+
+ // Check if Matching data type
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, dst);
+
+ // Data type
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
+ // Data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(lhs, DataLayout::NHWC);
+
+ // All tensor infos are initialized
+ ARM_COMPUTE_RETURN_ERROR_ON(lhs->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(rhs->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+
+ // Device requirements are met
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(lhs);
+
+ // Check if dst shape is correct
+ MatMulKernelInfo matmul_kernel_info =
+ MatMulKernelInfo(attributes.adj_lhs(), attributes.adj_rhs(), settings.m0(), settings.n0(), settings.k0());
+ const auto expected_dst_shape =
+ misc::shape_calculator::compute_matmul_shape(lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), expected_dst_shape);
+
+ // Check if block sizes are supported
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_matmul_kernel_info(attributes, settings));
+
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ opencl::kernels::validate_matmul_input_shapes(lhs->tensor_shape(), rhs->tensor_shape(), matmul_kernel_info));
+
+ return Status{};
+}
+
+ClComponentMatMul::ClComponentMatMul(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+ : IGpuKernelComponent{id, properties, tensors},
+ _component_writer{std::make_unique<GpuCkwMatMul>(id, tensors, attributes, settings)}
+{
+}
+
+ClComponentMatMul::~ClComponentMatMul()
+{
+}
+
+const IGpuCkwComponentDriver *ClComponentMatMul::ckw_component_driver() const
+{
+ return _component_writer.get();
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h
new file mode 100644
index 0000000000..41833e4adb
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTMATMUL_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTMATMUL_H
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h"
+
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+template <typename T>
+class ArgumentPack;
+class MatMulAttributes;
+class GpuCkwMatMul;
+
+class ClComponentMatMul final : public IGpuKernelComponent
+{
+public:
+ /** Attributes are a set of backend-agnostic parameters that define what a component does */
+ using Attributes = MatMulAttributes;
+ /** Settings are a set of backend-specific parameters that influence the implementation of a component */
+ using Settings = GpuMatMulSettings;
+
+ /** Validate the component
+ *
+ * @param[in] properties Component properties
+ * @param[in,out] tensors Tensor arguments to the component
+ * @param[in] attributes Component attributes
+ * @param[in] settings Component settings
+ *
+ * @return Status Validation results
+ *
+ * Tensor argument names:
+ * - ACL_SRC_0: LHS
+ * - ACL_SRC_1: RHS
+ * - ACL_DST_0: Output
+ *
+ * Tensor argument constness:
+ * - ACL_SRC_0: Const
+ * - ACL_SRC_1: Const
+ * - ACL_DST_0: Const
+ *
+ * Valid data layouts:
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |ACL_SRC_0 |ACL_SRC_1 |ACL_DST_0 |
+ * |:--------------|:--------------|:--------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ */
+ static Status validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
+
+ /** Constructor
+ *
+ * Similar to @ref ClComponentMatMul::validate()
+ */
+ ClComponentMatMul(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
+ /** Destructor */
+ ~ClComponentMatMul() override;
+ /** Prevent instances of this class from being copy constructed */
+ ClComponentMatMul(const ClComponentMatMul &component) = delete;
+ /** Prevent instances of this class from being copied */
+ ClComponentMatMul &operator=(const ClComponentMatMul &component) = delete;
+ /** Allow instances of this class to be move constructed */
+ ClComponentMatMul(ClComponentMatMul &&component) = default;
+ /** Allow instances of this class to be moved */
+ ClComponentMatMul &operator=(ClComponentMatMul &&component) = default;
+ /** Get writer for the component */
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
+ /** Get component type */
+ GpuComponentType type() const override
+ {
+ return GpuComponentType::Complex;
+ }
+
+private:
+ std::unique_ptr<GpuCkwMatMul> _component_writer;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTMATMUL_H
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
new file mode 100644
index 0000000000..ee27b5ea47
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include "src/common/utils/Log.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+void calculate_and_init_dst_if_empty(ITensorInfo *dst,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const MatMulAttributes &attributes,
+ const GpuMatMulSettings &settings)
+{
+ ARM_COMPUTE_UNUSED(attributes);
+
+ if (dst->total_size() == 0U)
+ {
+ const auto dst_shape = misc::shape_calculator::compute_matmul_shape(
+ lhs->tensor_shape(), rhs->tensor_shape(),
+ MatMulKernelInfo(attributes.adj_lhs(), attributes.adj_rhs(), settings.m0(), settings.n0(), settings.k0()));
+
+ auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(dst_shape));
+ }
+}
+
+/* A helper method to reduce the duplication in dst tensor initialization
+* when calling validate()
+*/
+Status is_supported_op_helper(const GpuWorkloadContext &context,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const ITensorInfo *dst,
+ const MatMulAttributes &attributes,
+ const GpuMatMulSettings &settings)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+
+ TensorInfo dst_info_to_validate;
+ const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
+
+ if (dst != nullptr)
+ {
+ dst_info_to_validate_ptr = dst;
+ }
+
+ calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs, attributes, settings);
+
+ // Check support level
+ // Data type
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
+ // Data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(lhs, DataLayout::NHWC);
+
+ // Check components
+ if (context.gpu_language() == GpuLanguage::OpenCL)
+ {
+ const auto cl_compile_ctx = context.cl_compile_context();
+ ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
+ // Validate MatMul Component
+ {
+ const auto properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+
+ ArgumentPack<ITensorInfo> arguments;
+ arguments.add_const_tensor(ACL_SRC_0, lhs);
+ arguments.add_const_tensor(ACL_SRC_1, rhs);
+ arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
+
+ ARM_COMPUTE_RETURN_ON_ERROR(ClComponentMatMul::validate(properties, arguments, attributes, settings));
+ }
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_MSG("Unimplemented Gpu language");
+ }
+ return Status{};
+}
+
+constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
+} // namespace
+
+int GpuMatMulSettings::n0() const
+{
+ return _n0;
+}
+
+GpuMatMulSettings &GpuMatMulSettings::n0(int n0)
+{
+ _n0 = n0;
+ return *this;
+}
+
+int GpuMatMulSettings::m0() const
+{
+ return _m0;
+}
+
+GpuMatMulSettings &GpuMatMulSettings::m0(int m0)
+{
+ _m0 = m0;
+ return *this;
+}
+
+int GpuMatMulSettings::k0() const
+{
+ return _k0;
+}
+
+GpuMatMulSettings &GpuMatMulSettings::k0(int k0)
+{
+ _k0 = k0;
+ return *this;
+}
+
+Status GpuMatMul::is_supported_op(const GpuWorkloadContext &context,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const MatMulAttributes &attributes,
+ const GpuMatMulSettings &settings)
+{
+ return is_supported_op_helper(context, lhs, rhs, nullptr, attributes, settings);
+}
+
+Status GpuMatMul::validate_op(const GpuWorkloadSketch &sketch,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs,
+ const MatMulAttributes &attributes,
+ const GpuMatMulSettings &settings)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+
+ // Check if tensors have valid id. I.e. they are created from a sketch
+ ARM_COMPUTE_RETURN_ERROR_ON(!lhs->has_valid_id() || !rhs->has_valid_id());
+
+ // Refer to GpuMatmul::validate_op() for id-validness of this TensorInfo object
+ TensorInfo dst_info_to_validate;
+
+ // Auto initialize dst tensor info
+ calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs, attributes, settings);
+
+ // Perform fusion test
+ // Check if operator meets fusion constraints
+ ArgumentPack<ITensorInfo> tensors;
+ tensors.add_const_tensor(ACL_SRC_0, lhs);
+ tensors.add_const_tensor(ACL_SRC_1, rhs);
+ tensors.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+ const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op),
+ "Operator fusion test failed. This operator cannot be fused into the workload");
+
+ // Check if configuration is supported
+ return is_supported_op_helper(*sketch.gpu_context(), lhs, rhs, &dst_info_to_validate, attributes, settings);
+}
+
+ITensorInfo *GpuMatMul::create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *lhs,
+ ITensorInfo *rhs,
+ const Attributes &attributes,
+ const Settings &settings)
+{
+ ARM_COMPUTE_LOG_PARAMS(lhs, rhs, attributes, settings);
+
+ ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+
+ // Assert validation
+ ARM_COMPUTE_ERROR_THROW_ON(GpuMatMul::validate_op(sketch, lhs, rhs, attributes, settings));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
+
+ // Auto initialize dst tensor
+ calculate_and_init_dst_if_empty(dst, lhs, rhs, attributes, settings);
+
+ // Translate into components and add to component graph
+ auto &comp_graph = sketch.implementation().component_graph();
+ const auto sketch_ctx = sketch.implementation().context();
+
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ {
+ auto properties = IGpuKernelComponent::Properties();
+ properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+
+ ArgumentPack<ITensorInfo> arguments;
+ arguments.add_const_tensor(ACL_SRC_0, lhs);
+ arguments.add_const_tensor(ACL_SRC_1, rhs);
+ arguments.add_const_tensor(ACL_DST_0, dst);
+ comp_graph.add_new_component<ClComponentMatMul>(properties, arguments, attributes, settings);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unimplemented Gpu language");
+ }
+
+ // Set up fusion test by adding to the Operator Group
+ // Note this has to be performed after all the components have been successfully added to the component graph
+
+ // Pack tensor infos
+ ArgumentPack<ITensorInfo> tensors;
+ tensors.add_const_tensor(ACL_SRC_0, lhs);
+ tensors.add_const_tensor(ACL_SRC_1, rhs);
+ tensors.add_const_tensor(ACL_DST_0, dst);
+
+ const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+ sketch.implementation().operator_group().add_operator(op);
+
+ return dst;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF