33 files changed, 5231 insertions, 0 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp
new file mode 100644
index 0000000000..a42b39700c
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+
+#include "compute_kernel_writer/include/ckw/Error.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+GpuCkwComponentArgument::GpuCkwComponentArgument(ckw::TensorOperand tensor) : _tensor(tensor)
+{
+}
+
+GpuCkwComponentArgument &GpuCkwComponentArgument::init_virtual_tensor(ckw::TileOperand         &tile,
+                                                                      const ckw::TensorSampler &sampler)
+{
+    CKW_ASSERT(_tile == nullptr);
+
+    _tile    = tile;
+    _sampler = sampler;
+
+    return *this;
+}
+
+bool GpuCkwComponentArgument::has_tensor() const
+{
+    return _tensor.is_valid();
+}
+
+ckw::TensorOperand &GpuCkwComponentArgument::tensor()
+{
+    CKW_ASSERT(_tensor.is_valid());
+
+    return _tensor;
+}
+
+const ckw::TensorOperand &GpuCkwComponentArgument::tensor() const
+{
+    CKW_ASSERT(_tensor.is_valid());
+
+    return _tensor;
+}
+
+bool GpuCkwComponentArgument::has_tile() const
+{
+    return _tile.is_valid();
+}
+
+ckw::TileOperand &GpuCkwComponentArgument::tile()
+{
+    CKW_ASSERT(_tile.is_valid());
+
+    return _tile;
+}
+
+const ckw::TileOperand &GpuCkwComponentArgument::tile() const
+{
+    CKW_ASSERT(_tile.is_valid());
+
+    return _tile;
+}
+
+ckw::TensorSampler &GpuCkwComponentArgument::tensor_sampler()
+{
+    CKW_ASSERT(_tile.is_valid());
+
+    return _sampler;
+}
+
+const ckw::TensorSampler &GpuCkwComponentArgument::tensor_sampler() const
+{
+    CKW_ASSERT(_tile.is_valid());
+
+    return _sampler;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h
new file mode 100644
index 0000000000..7a57c81e5f
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWCOMPONENTARGUMENT_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWCOMPONENTARGUMENT_H
+
+#include "compute_kernel_writer/include/ckw/TensorOperand.h"
+#include "compute_kernel_writer/include/ckw/TensorSampler.h"
+#include "compute_kernel_writer/include/ckw/TileOperand.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+/** The argument of a dynamic fusion component which can be either user tensor or virtual tensor. */
+class GpuCkwComponentArgument
+{
+public:
+    /** Default constructor */
+    GpuCkwComponentArgument() = default;
+
+    /** Initialize a new instance of @ref GpuCkwComponentArgument class for user tensor.
+     *
+     * @param[in] tensor The user tensor.
+     */
+    explicit GpuCkwComponentArgument(ckw::TensorOperand tensor);
+
+    /** Bind the tile and sampler to the tensor argument.
+     *
+     * This method can be used to share a tile and sampler associated to a tensor
+     * among different kernel components. For example, when we create the destination
+     * tile and destination sampler for the first time (root component), this method can be
+     * used to bind these two information to the destination tensor so that the following
+     * simple components know the tile size and how to access the elements from memory.
+     *
+     * @param[in] tile    The tile that has been loaded.
+     * @param[in] sampler The tensor sampling information that has been used to load the tile.
+     */
+    GpuCkwComponentArgument &init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorSampler &sampler);
+
+    /** Get whether the argument is a user tensor. */
+    bool has_tensor() const;
+
+    /** Get the tensor operand.
+     *
+     * If the tensor is not available, throw an error.
+     */
+    ckw::TensorOperand &tensor();
+
+    /** Get the tensor operand.
+     *
+     * If the tensor is not available, throw an error.
+     */
+    const ckw::TensorOperand &tensor() const;
+
+    /** Get whether the argument contains a tile.
+     *
+     * The argument can be either a user tensor that has been loaded,
+     * or a virtual tensor (i.e. a tile with tensor sampling information).
+     */
+    bool has_tile() const;
+
+    /** Get the tile operand.
+     *
+     * If the tile is not available, throw an error.
+     */
+    ckw::TileOperand &tile();
+
+    /** Get the tile operand.
+     *
+     * If the tile is not available, throw an error.
+     */
+    const ckw::TileOperand &tile() const;
+
+    /** Get the tensor sampling information for the tile.
+     *
+     * If the tile is not available, throw an error.
+     */
+    ckw::TensorSampler &tensor_sampler();
+
+    /** Get the tensor sampling information for the tile.
+     *
+     * If the tile is not available, throw an error.
+     */
+    const ckw::TensorSampler &tensor_sampler() const;
+
+private:
+    ckw::TensorOperand _tensor{};
+    ckw::TileOperand   _tile{};
+    ckw::TensorSampler _sampler{};
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWCOMPONENTARGUMENT_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
new file mode 100644
index 0000000000..a0e5e16aa0
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Window.h"
+
+#include "src/common/utils/Log.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+#include "compute_kernel_writer/include/ckw/KernelWriter.h"
+#include "compute_kernel_writer/include/ckw/types/TargetArchitecture.h"
+#include "compute_kernel_writer/include/ckw/types/TargetLanguage.h"
+
+using namespace ckw;
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components)
+{
+    _components = components;
+
+    // Generate kernel name
+    std::string kernel_name;
+    for (auto &comp : _components)
+    {
+        auto ckw_driver = comp->ckw_component_driver();
+        ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
+        kernel_name += ckw_driver->get_name(_components) + "__";
+    }
+
+    // Generate kernel code
+    auto root_writer =
+        KernelWriter::create_instance(ckw::TargetArchitecture::GpuArmMaliValhall, ckw::TargetLanguage::OpenCL);
+    GpuCkwScopedKernelWriter writer(root_writer.get());
+    GpuCkwVariableTable      vtable{};
+
+    for (auto &comp : _components)
+    {
+        auto ckw_driver = comp->ckw_component_driver();
+        ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
+        ckw_driver->write_component_code(_components, vtable, writer);
+    }
+    auto kernel = root_writer->emit_kernel(kernel_name);
+
+    // Set the kernel name, kernel arguments and source code
+    _kernel_name = kernel_name;
+    _kernel_args = kernel->arguments();
+    _kernel_code = kernel->source_code();
+}
+
+std::string GpuCkwDriver::get_name()
+{
+    return _kernel_name;
+}
+
+std::string GpuCkwDriver::get_code()
+{
+    return _kernel_code;
+}
+
+std::string GpuCkwDriver::get_config_id()
+{
+    std::string id;
+    for (auto &comp : _components)
+    {
+        auto ckw_driver = comp->ckw_component_driver();
+        ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
+        id = ckw_driver->get_tuner_id(_components) + "__";
+    }
+    return id;
+}
+
+Window GpuCkwDriver::get_window() const
+{
+    const auto root_comp = _components.get_root_component();
+    ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found");
+    return root_comp->ckw_component_driver()->get_window();
+}
+
+GpuKernelArgumentList GpuCkwDriver::get_kernel_arguments()
+{
+    GpuKernelArgumentList args{};
+    for (const auto &arg : _kernel_args)
+    {
+        switch (arg.type())
+        {
+            case KernelArgument::Type::TensorStorage:
+            {
+                args.emplace_back(static_cast<ITensorInfo::Id>(arg.id()), from_ckw(arg.tensor_storage_type()));
+                break;
+            }
+            case KernelArgument::Type::TensorComponent:
+            {
+                args.emplace_back(static_cast<ITensorInfo::Id>(arg.id()), from_ckw(arg.tensor_component_type()));
+                break;
+            }
+            default:
+            {
+                ARM_COMPUTE_ERROR("Unsupported KernelArgument Type");
+                break;
+            }
+        }
+    }
+    return args;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
new file mode 100644
index 0000000000..f8770920b7
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER_H
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
+
+#include "compute_kernel_writer/include/ckw/Kernel.h"
+#include "compute_kernel_writer/include/ckw/KernelArgument.h"
+#include <string>
+
+namespace arm_compute
+{
+/** Forward declarations */
+class Window;
+
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Use Kernel Writer to write kernel code
+ *  Used by dynamic_fusion module
+ */
+class GpuCkwDriver : public IGpuKernelWriter
+{
+public:
+    /** Default constructor */
+    GpuCkwDriver() = delete;
+    /** Constructor
+     *
+     * @param[in] components Kernel component group from which the kernel will be generated
+     */
+    GpuCkwDriver(const GpuKernelComponentGroup &components);
+    /** Destructor */
+    ~GpuCkwDriver() override = default;
+    /** Generate kernel name */
+    std::string get_name() override;
+    /** Generate kernel code */
+    std::string get_code() override;
+    /** Generate config id string of the entire kernel. This is used for tuning */
+    std::string get_config_id() override;
+    /** Generate execution window */
+    Window get_window() const override;
+    /** Get the flat list of arguments of the kernel*/
+    GpuKernelArgumentList get_kernel_arguments() override;
+
+private:
+    GpuKernelComponentGroup          _components{};
+    std::string                      _kernel_name{};
+    std::vector<ckw::KernelArgument> _kernel_args{};
+    std::string                      _kernel_code{};
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp
new file mode 100644
index 0000000000..ae12d13e5a
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+GpuCkwScopedKernelWriter::GpuCkwScopedKernelWriter(ckw::KernelWriter *writer)
+    : _writer(writer), _parent_id_space(writer->id_space())
+{
+    _writer->new_id_space();
+}
+
+GpuCkwScopedKernelWriter::GpuCkwScopedKernelWriter(const GpuCkwScopedKernelWriter &other)
+    : _writer(other._writer), _parent_id_space(other._writer->id_space())
+{
+    _writer->new_id_space();
+}
+
+ckw::KernelWriter *GpuCkwScopedKernelWriter::operator->()
+{
+    return _writer;
+}
+
+const ckw::KernelWriter *GpuCkwScopedKernelWriter::operator->() const
+{
+    return _writer;
+}
+
+ckw::KernelWriter *GpuCkwScopedKernelWriter::writer()
+{
+    return _writer;
+}
+
+const ckw::KernelWriter *GpuCkwScopedKernelWriter::writer() const
+{
+    return _writer;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h
new file mode 100644
index 0000000000..84dd706cd0
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWSCOPEDKERNELWRITER_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWSCOPEDKERNELWRITER_H
+
+#include "compute_kernel_writer/include/ckw/KernelWriter.h"
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+/** Helper to automatically manage kernel writer ID space. */
+class GpuCkwScopedKernelWriter
+{
+public:
+    /** Initialize a new instance of @ref GpuCkwScopedKernelWriter class. */
+    explicit GpuCkwScopedKernelWriter(ckw::KernelWriter *writer);
+
+    /** Create a new scope from the specified scoped kernel writer. */
+    GpuCkwScopedKernelWriter(const GpuCkwScopedKernelWriter &other);
+
+    /** Assignment is disallowed. */
+    GpuCkwScopedKernelWriter &operator=(const GpuCkwScopedKernelWriter &) = delete;
+
+    /** Access the underlying kernel writer. */
+    ckw::KernelWriter *operator->();
+
+    /** Access the underlying kernel writer. */
+    const ckw::KernelWriter *operator->() const;
+
+    /** Get the kernel writer. */
+    ckw::KernelWriter *writer();
+
+    /** Get the kernel writer. */
+    const ckw::KernelWriter *writer() const;
+
+private:
+    ckw::KernelWriter *_writer;
+    int32_t            _parent_id_space;
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWSCOPEDKERNELWRITER_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
new file mode 100644
index 0000000000..66ccc1ac34
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
+#include <sstream>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group,
+                                                               GpuCkwScopedKernelWriter      &writer,
+                                                               const ITensorInfo             *tensor,
+                                                               const std::string             &alias)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
+
+    // Do not re-declare if the variable associated with the tensor has already been declared
+    auto it = _vars.find(tensor->id());
+
+    if (it != _vars.end())
+    {
+        return &it->second;
+    }
+    if (comp_group.is_intermediate_tensor(tensor))
+    {
+        // Create a virtual tensor variable
+        GpuCkwComponentArgument var;
+        auto                  &&inserted = _vars.emplace(tensor->id(), var);
+        return &(inserted.first->second);
+    }
+    else
+    {
+        // Create a user tensor variable
+        std::stringstream ss;
+        ss << alias << "_t" << abs(tensor->id());
+        const auto              uniq_name = ss.str();
+        GpuCkwComponentArgument var{writer->declare_tensor_argument(uniq_name, to_ckw(*tensor))};
+        auto                  &&inserted = _vars.emplace(tensor->id(), var);
+        return &(inserted.first->second);
+    }
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
new file mode 100644
index 0000000000..fc8764c3e2
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE_H
+
+#include "arm_compute/core/ITensorInfo.h"
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+
+#include <map>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuKernelComponentGroup;
+class GpuCkwScopedKernelWriter;
+
+/** A table of all the variables used in the kernel.
+ *
+ * It determines whether we create an virtual tensor var or a user tensor var
+ * It avoids duplicating variables for the same tensors (Tensors with the same id)
+ * Each kernel has exactly one variable table.
+ */
+class GpuCkwVariableTable
+{
+public:
+    /** Declare a kernel component variable(argument) for the corresponding tensor info.
+     *
+     * @param[in] comp_group Component group the tensor belongs to
+     * @param[in] writer     Compute Kernel Writer
+     * @param[in] tensor     Tensor info with which the new variable is associated
+     * @param[in] alias      Alias for the variable. Will be used as part of the variable name
+     *
+     * @return GpuCkwComponentArgument*
+     */
+    GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group,
+                                              GpuCkwScopedKernelWriter      &writer,
+                                              const ITensorInfo             *tensor,
+                                              const std::string             &alias = "unnamed");
+
+private:
+    std::map<ITensorInfo::Id, GpuCkwComponentArgument> _vars{};
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h
new file mode 100644
index 0000000000..52e56e2e35
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_IGPUCKWCOMPONENTDRIVER
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_IGPUCKWCOMPONENTDRIVER
+
+#include "arm_compute/core/Window.h"
+
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/components/Types.h"
+
+namespace arm_compute
+{
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+class GpuKernelComponentGroup;
+class GpuCkwVariableTable;
+class GpuCkwScopedKernelWriter;
+
+/** An interface used by @ref GpuCkwDriver to write source code for a kernel component
+ *
+ * There are 3 main architecture layers for using Compute Kernel Writer (Ckw) inside ACL's dynamic fusion module
+ * From top level to bottom level:
+ * | Layer          | Library
+ * ===========================
+ * | dynamic_fusion |   acl
+ * | ckw_driver     |   acl
+ * | ckw            |   ckw
+ *
+ * ckw_driver is a glue layer that directs how fused code is produced using the ckw library
+ *
+ * There are two main groups within ckw_driver:
+ * - @ref GpuCkwDriver is a global driver that coordinates how the final fused code along with all the info necessary
+ *   for run time execution is produced using ckw
+ * - Various classes implementing @ref IGpuCkwComponentDriver is a component driver that directs ckw to generate kernel component code (e.g. activation, store etc.)
+ *
+ * The overall flow goes like this:
+ * In dynamic_fusion module, @ref GpuLogicalKernel instantiates a @ref GpuCkwDriver from a @ref GpuKernelComponentGroup
+ * The logical kernel then uses the global driver's various interfaces to generate the code info.
+ * In particular, the @ref GpuCkwDriver::get_code() interface will call into each @ref IGpuCkwComponentDriver::write_component_code()
+ */
+class IGpuCkwComponentDriver
+{
+public:
+    using ComponentGroup = GpuKernelComponentGroup;
+
+public:
+    /** Constructor
+     *
+     * @param[in] id      Component id
+     * @param[in] tensors Tensor arguments to the components
+     */
+    IGpuCkwComponentDriver(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) : _id{id}, _tensors{tensors}
+    {
+    }
+    /** Destructor */
+    virtual ~IGpuCkwComponentDriver()
+    {
+    }
+    /** Generate kernel component code
+     *
+     * @param[in]      comp_group Component group of which the component is a part of
+     * @param[in, out] vtable     Table of variables declared by each component
+     * @param[in, out] writer     CKW writer that writes code scoped to this kernel component.
+     *
+     *                            @note @p writer can only be passed via value since the new scope is created in the copy constructor
+     */
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const = 0;
+    /** Get tensor arguments */
+    ArgumentPack<ITensorInfo> tensors() const
+    {
+        return _tensors;
+    }
+    /** Generate the execution window for the component */
+    virtual Window get_window() const
+    {
+        return Window{};
+    }
+    /** Generate the name of the component
+     *
+     * This will be concatenated with other components' names to form the name of the kernel
+     */
+    virtual std::string get_name(const ComponentGroup &comp_group) const
+    {
+        ARM_COMPUTE_UNUSED(comp_group);
+        return "unnamed";
+    }
+    /** Generate the tuner id of the component
+     *  This id should capture all the parameters that distinguish one kernel's lws tuning from another.
+     *  e.g. two components that are identical in every other way, but have output tensor dimensions should
+     *  have different tuner ids, because the lws of one may not be optimal on the other.
+     *
+     * This will be concatenated with other components' tuner id to form the tuner id of the kernel
+     */
+    virtual std::string get_tuner_id(const ComponentGroup &comp_group) const
+    {
+        ARM_COMPUTE_UNUSED(comp_group);
+        return "";
+    }
+    /** Get component id */
+    ComponentId id() const
+    {
+        return _id;
+    }
+
+private:
+    ComponentId               _id{-1};
+    ArgumentPack<ITensorInfo> _tensors{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_IGPUCKWCOMPONENTDRIVER */
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
new file mode 100644
index 0000000000..18fda5bd6b
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuCkwActivation.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
+#include "compute_kernel_writer/include/ckw/KernelWriter.h"
+#include <cstdint>
+#include <string>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+GpuCkwActivation::GpuCkwActivation(ComponentId                      id,
+                                   const ArgumentPack<ITensorInfo> &tensors,
+                                   const Attributes                &attributes) // NOLINT
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
+{
+    _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
+}
+
+void GpuCkwActivation::write_component_code(const ComponentGroup    &comp_group,
+                                            GpuCkwVariableTable     &vtable,
+                                            GpuCkwScopedKernelWriter writer) const
+{
+    /********************************************************************************
+     * 1 - Define tensors
+     ********************************************************************************/
+    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
+    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+
+    /********************************************************************************
+     * 2 - Define CKW constants
+     ********************************************************************************/
+    const auto dst_h  = static_cast<int32_t>(_dst->dimension(1));
+    const auto dst_dt = to_ckw(_dst->data_type());
+
+    // CKW constants
+    auto const_dst_h_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_h}}, ckw::DataType::Int32));
+    auto const_pos_1_i32 = writer->declare_constant_tile(ckw::ConstantData({{1}}, ckw::DataType::Int32));
+    auto const_0_i32     = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
+    auto const_neg_1_fp  = writer->declare_constant_tile(ckw::ConstantData({{-1.0f}}, dst_dt));
+    auto const_pos_1_fp  = writer->declare_constant_tile(ckw::ConstantData({{1.0f}}, dst_dt));
+    auto const_0_fp      = writer->declare_constant_tile(ckw::ConstantData({{0.0f}}, dst_dt));
+    auto const_A_fp      = writer->declare_constant_tile(ckw::ConstantData({{_attributes.a()}}, dst_dt));
+    auto const_B_fp      = writer->declare_constant_tile(ckw::ConstantData({{_attributes.b()}}, dst_dt));
+
+    /********************************************************************************
+     * 3 - Define the compute block parameters and destination tile (if not root component)
+     *     Bind the tile to the tensor to share it among different components and
+     *     initialize the compute block parameters
+     ********************************************************************************/
+    // The compute block parameters depend on the employed tensor format
+
+    // Destination compute block size
+    int32_t dst_n0 = -1;
+    int32_t dst_m0 = -1;
+
+    // Destination compute block size left-over
+    int32_t dst_n0_partial = -1;
+    int32_t dst_m0_partial = -1;
+
+    // Shift-back for the overlapping-min strategy
+    int32_t dst_shift_back = -1;
+
+    if (!dst->has_tile())
+    {
+        // If ROOT component, we use ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1
+        // as tensor format
+        const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
+
+        dst_n0         = root_window.x().step();
+        dst_m0         = root_window.y().step();
+        dst_n0_partial = _dst->dimension(0) % dst_n0;
+        dst_m0_partial = (_dst->dimension(1) * _dst->dimension(2)) % dst_m0;
+        dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
+
+        ckw::TensorSampler sampler_dst;
+        sampler_dst.format(ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1);
+
+        if (dst_n0_partial == 0)
+        {
+            sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+        }
+        else
+        {
+            sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::OverlappingMin);
+        }
+
+        if (dst_m0_partial == 0)
+        {
+            sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+        }
+        else
+        {
+            sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::ClampToBorderMaxOnly);
+        }
+
+        sampler_dst.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+        sampler_dst.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+        // Declare destination tile
+        auto tile_dst = writer->declare_tile("dst", ckw::TileInfo(dst_dt, dst_m0, dst_n0));
+
+        // Bind tile to the tensor
+        dst->init_virtual_tensor(tile_dst, sampler_dst);
+    }
+    else
+    {
+        // dst_m0_partial depends on the TensorSamplerFormat
+        dst_n0         = dst->tile().tile_info().width();
+        dst_m0         = dst->tile().tile_info().height();
+        dst_n0_partial = _dst->dimension(0) % dst_n0;
+
+        ckw::TensorSampler sampler_dst = dst->tensor_sampler();
+
+        if (sampler_dst.format() == ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1)
+        {
+            dst_m0_partial = (_dst->dimension(1) * _dst->dimension(2)) % dst_m0;
+        }
+        else if (sampler_dst.format() == ckw::TensorSamplerFormat::Dim0_Dim1_Dim2)
+        {
+            dst_m0_partial = _dst->dimension(1) % dst_m0;
+        }
+
+        // Shift-back for the overlapping-min strategy
+        dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
+    }
+
+    const auto &tile_dst = dst->tile();
+
+    /********************************************************************************
+     * 4 - Define the compute block parameters CKW constants
+     ********************************************************************************/
+    // Only now we can declare the N0 and M0 as constant
+    auto const_dst_n0 = writer->declare_constant_tile(ckw::ConstantData({{dst_n0}}, ckw::DataType::Int32));
+    auto const_dst_m0 = writer->declare_constant_tile(ckw::ConstantData({{dst_m0}}, ckw::DataType::Int32));
+    auto const_dst_shift_back_n0 =
+        writer->declare_constant_tile(ckw::ConstantData({{dst_shift_back}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 5 - Define the sampler for the input tensor
+     ********************************************************************************/
+    if (!src->has_tile())
+    {
+        // Sampler
+        ckw::TensorSampler sampler_src = dst->tensor_sampler();
+
+        auto tile_gid_0 = writer->declare_tile("gid_0_src", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_gid_1 = writer->declare_tile("gid_1_src", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_gid_2 = writer->declare_tile("gid_2_src", ckw::TileInfo(ckw::DataType::Int32));
+
+        writer->op_get_global_id(tile_gid_0, 0);
+        writer->op_get_global_id(tile_gid_1, 1);
+        writer->op_get_global_id(tile_gid_2, 2);
+
+        auto tile_nout0 = writer->declare_tile("nout0_src", ckw::TileInfo(ckw::DataType::Int32)); // OFM
+        auto tile_mout0 =
+            writer->declare_tile("mout0_src", ckw::TileInfo(ckw::DataType::Int32)); // WIDTH or WIDTH x HEIGHT
+        auto tile_mout1 = writer->declare_tile("mout1_src", ckw::TileInfo(ckw::DataType::Int32)); // HEIGHT or 0
+        auto tile_bout0 = writer->declare_tile("bout0_src", ckw::TileInfo(ckw::DataType::Int32)); // BATCH SIZE IDX
+
+        get_coordinate_from_gws_overlapping_min(writer, tile_nout0, tile_gid_0, const_dst_n0, const_dst_shift_back_n0,
+                                                const_0_i32);
+        get_coordinate_from_gws(writer, tile_mout0, tile_gid_1, const_dst_m0);
+
+        // Get the boundary aware coordinates at each global dimension index
+        if (sampler_src.format() == ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1)
+        {
+            writer->op_assign(tile_mout1, const_0_i32);
+            get_coordinate_from_gws(writer, tile_bout0, tile_gid_2, const_pos_1_i32);
+        }
+        else if (sampler_src.format() == ckw::TensorSamplerFormat::Dim0_Dim1_Dim2)
+        {
+            writer->op_binary(tile_mout1, ckw::BinaryOp::Mod, tile_gid_2, const_dst_h_i32);
+            writer->op_binary(tile_bout0, ckw::BinaryOp::Div, tile_gid_2, const_dst_h_i32);
+        }
+
+        auto tile_src = writer->declare_tile("src", ckw::TileInfo(dst_dt, dst_m0, dst_n0));
+
+        writer->op_load(tile_src, src->tensor(), sampler_src, tile_nout0, tile_mout0, tile_mout1, tile_bout0);
+
+        // Here, init_virtual_tensor() it is used to bring the tile_src outside the compound statement
+        src->init_virtual_tensor(tile_src, sampler_src);
+    }
+
+    const auto &tile_src = src->tile();
+
+    /********************************************************************************
+     * 7 - Write the rest of the code
+     ********************************************************************************/
+    switch (_attributes.activation())
+    {
+        case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+        {
+            // dst = src * -1
+            writer->op_binary(tile_dst, ckw::BinaryOp::Mul, tile_src, const_neg_1_fp);
+            // dst = exp(src * -1)
+            writer->op_unary(tile_dst, ckw::UnaryOp::Exp, tile_dst);
+            // dst = 1 + (exp(src * -1))
+            writer->op_binary(tile_dst, ckw::BinaryOp::Add, tile_dst, const_pos_1_fp);
+            // dst = 1 /  1 + (exp(src * -1))
+            writer->op_binary(tile_dst, ckw::BinaryOp::Div, const_pos_1_fp, tile_dst);
+            break;
+        }
+        case ActivationLayerInfo::ActivationFunction::TANH:
+        {
+            writer->op_unary(tile_dst, ckw::UnaryOp::Tanh, tile_src);
+            break;
+        }
+        case ActivationLayerInfo::ActivationFunction::RELU:
+        {
+            // dst = max(src, 0)
+            writer->op_binary(tile_dst, ckw::BinaryOp::Max, tile_src, const_0_fp);
+            break;
+        }
+        case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+        {
+            //dst = max(src, 0)
+            writer->op_binary(tile_dst, ckw::BinaryOp::Max, tile_src, const_0_fp);
+            //dst = min(max(src, 0), A_VAL)
+            writer->op_binary(tile_dst, ckw::BinaryOp::Min, tile_dst, const_A_fp);
+            break;
+        }
+        case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+        {
+            //dst = max(src, B_VAL)
+            writer->op_binary(tile_dst, ckw::BinaryOp::Max, tile_src, const_B_fp);
+            //dst = min(max(src, B_VAL), A_VAL)
+            writer->op_binary(tile_dst, ckw::BinaryOp::Min, tile_dst, const_A_fp);
+            break;
+        }
+        default:
+            CKW_ASSERT(false);
+            break;
+    }
+    ARM_COMPUTE_ERROR_ON_MSG(dst->has_tile() == false, "You must bind a tile before appending another component");
+}
+
+Window GpuCkwActivation::get_window() const
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+
+    TensorShape output_shape = _dst->tensor_shape();
+    // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
+    // This is in line with the collapsing convention used by operators like Conv2d
+    output_shape.collapse(2U, 1U);
+    constexpr uint32_t vector_size_byte_opencl = 16;
+    const uint32_t     num_elems_processed_per_iteration =
+        adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+    Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+
+    return win;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h
new file mode 100644
index 0000000000..386e933a72
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWACTIVATION
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWACTIVATION
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwActivation : public IGpuCkwComponentDriver
+{
+public:
+    using Attributes = ClComponentActivation::Attributes;
+    /** Constructor
+     *
+     * For supported configurations please refer to @ref GpuCkwActivation::validate()
+     *
+     * @param[in] id         Component id
+     * @param[in] tensors    Tensor arguments to the component
+     * @param[in] attributes Component attributes
+     */
+    GpuCkwActivation(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwActivation);
+    /** Destructor */
+    ~GpuCkwActivation() override = default;
+    // Inherited methods overriden:
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+
+private:
+    const ITensorInfo *_src;
+    const ITensorInfo *_dst;
+    Attributes         _attributes;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWACTIVATION */
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
new file mode 100644
index 0000000000..d3e0dbafd4
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuCkwCast.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
+#include "compute_kernel_writer/include/ckw/KernelWriter.h"
+#include <cstdint>
+#include <string>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+GpuCkwCast::GpuCkwCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
+{
+    _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
+    ARM_COMPUTE_ERROR_ON_MSG(is_data_type_float(_src->data_type()) == false,
+                             "The source data type must be a floating-point data type");
+}
+
+void GpuCkwCast::write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const
+{
+    /********************************************************************************
+     * 1 - Define tensors
+     ********************************************************************************/
+    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
+    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+
+    /********************************************************************************
+     * 2 - Define CKW constants
+     ********************************************************************************/
+    const auto dst_h = static_cast<int32_t>(_dst->dimension(1));
+
+    // CKW constants
+    auto const_dst_h_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_h}}, ckw::DataType::Int32));
+    auto const_pos_1_i32 = writer->declare_constant_tile(ckw::ConstantData({{1}}, ckw::DataType::Int32));
+    auto const_0_i32     = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 3 - Define the compute block parameters and destination tile (if not root component)
+     *     Bind the tile to the tensor to share it among different components and
+     *     initialize the compute block parameters
+     ********************************************************************************/
+    // The compute block parameters depend on the employed tensor format
+
+    // Destination compute block size
+    int32_t dst_n0 = -1;
+    int32_t dst_m0 = -1;
+
+    // Destination compute block size left-over
+    int32_t dst_n0_partial = -1;
+    int32_t dst_m0_partial = -1;
+
+    // Shift-back for the overlapping-min strategy
+    int32_t dst_shift_back = -1;
+
+    if (!dst->has_tile())
+    {
+        // If ROOT component, we use ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1
+        // as tensor format
+        const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
+
+        dst_n0         = root_window.x().step();
+        dst_m0         = root_window.y().step();
+        dst_n0_partial = _dst->dimension(0) % dst_n0;
+        dst_m0_partial = (_dst->dimension(1) * _dst->dimension(2)) % dst_m0;
+        dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
+
+        ckw::TensorSampler sampler_dst;
+        sampler_dst.format(ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1);
+        if (dst_n0_partial == 0)
+        {
+            sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+        }
+        else
+        {
+            sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::OverlappingMin);
+        }
+
+        if (dst_m0_partial == 0)
+        {
+            sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+        }
+        else
+        {
+            sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::ClampToBorderMaxOnly);
+        }
+
+        sampler_dst.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+        sampler_dst.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+        // Declare destination tile
+        ckw::DataType dst_dt   = to_ckw(_dst->data_type());
+        auto          tile_dst = writer->declare_tile("dst", ckw::TileInfo(dst_dt, dst_m0, dst_n0));
+
+        // Bind tile to the tensor
+        dst->init_virtual_tensor(tile_dst, sampler_dst);
+    }
+    else
+    {
+        // Change dst_n0 and dst_m0 if NOT root component!
+        // ATTENTION:
+        // dst_m0_partial depends on the TensorSamplerFormat
+        dst_n0         = dst->tile().tile_info().width();
+        dst_m0         = dst->tile().tile_info().height();
+        dst_n0_partial = _dst->dimension(0) % dst_n0;
+
+        ckw::TensorSampler sampler_dst = dst->tensor_sampler();
+
+        if (sampler_dst.format() == ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1)
+        {
+            dst_m0_partial = (_dst->dimension(1) * _dst->dimension(2)) % dst_m0;
+        }
+        else if (sampler_dst.format() == ckw::TensorSamplerFormat::Dim0_Dim1_Dim2)
+        {
+            dst_m0_partial = _dst->dimension(1) % dst_m0;
+        }
+
+        // Shift-back for the overlapping-min strategy
+        dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
+    }
+
+    const auto &tile_dst = dst->tile();
+
+    /********************************************************************************
+     * 4 - Define the compute block parameters CKW constants
+     ********************************************************************************/
+    // Only now we can declare the N0 and M0 as constant
+    auto const_dst_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_n0}}, ckw::DataType::Int32));
+    auto const_dst_m0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_m0}}, ckw::DataType::Int32));
+    auto const_dst_shift_back_n0_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{dst_shift_back}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 5 - Define the sampler for the input tensor
+     ********************************************************************************/
+    if (!src->has_tile())
+    {
+        // Sampler
+        ckw::TensorSampler sampler_src = dst->tensor_sampler();
+
+        auto tile_gid_0 = writer->declare_tile("gid_0", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_gid_1 = writer->declare_tile("gid_1", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_gid_2 = writer->declare_tile("gid_2", ckw::TileInfo(ckw::DataType::Int32));
+
+        writer->op_get_global_id(tile_gid_0, 0);
+        writer->op_get_global_id(tile_gid_1, 1);
+        writer->op_get_global_id(tile_gid_2, 2);
+
+        auto tile_cout0 = writer->declare_tile("cout0", ckw::TileInfo(ckw::DataType::Int32)); // OFM
+        auto tile_mout0 = writer->declare_tile("mout0", ckw::TileInfo(ckw::DataType::Int32)); // WIDTH or WIDTH x HEIGHT
+        auto tile_mout1 = writer->declare_tile("mout1", ckw::TileInfo(ckw::DataType::Int32)); // HEIGHT or 0
+        auto tile_bout0 = writer->declare_tile("bout0", ckw::TileInfo(ckw::DataType::Int32)); // BATCH SIZE IDX
+
+        // Calculate coordinates
+        get_coordinate_from_gws_overlapping_min(writer, tile_cout0, tile_gid_0, const_dst_n0_i32,
+                                                const_dst_shift_back_n0_i32, const_0_i32);
+        get_coordinate_from_gws(writer, tile_mout0, tile_gid_1, const_dst_m0_i32);
+
+        // Get the boundary aware coordinates at each global dimension index
+        if (sampler_src.format() == ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1)
+        {
+            writer->op_assign(tile_mout1, const_0_i32);
+            get_coordinate_from_gws(writer, tile_bout0, tile_gid_2, const_pos_1_i32);
+        }
+        else if (sampler_src.format() == ckw::TensorSamplerFormat::Dim0_Dim1_Dim2)
+        {
+            writer->op_binary(tile_mout1, ckw::BinaryOp::Mod, tile_gid_2, const_dst_h_i32);
+            writer->op_binary(tile_bout0, ckw::BinaryOp::Div, tile_gid_2, const_dst_h_i32);
+        }
+        ckw::DataType src_dt   = to_ckw(_src->data_type());
+        auto          tile_src = writer->declare_tile("src", ckw::TileInfo(src_dt, dst_m0, dst_n0));
+
+        writer->op_load(tile_src, src->tensor(), sampler_src, tile_cout0, tile_mout0, tile_mout1, tile_bout0);
+
+        // Here, init_virtual_tensor() it is used to bring the tile_src outside the compound statement
+        src->init_virtual_tensor(tile_src, sampler_src);
+    }
+
+    auto tile_src = src->tile();
+
+    /********************************************************************************
+     * 6 - Extra operations required before writing the main code (optional)
+     ********************************************************************************/
+
+    // Not required
+
+    /********************************************************************************
+     * 7 - Write the rest of the code
+     ********************************************************************************/
+    // Only None ConvertPolicy is supported for floating-point data types
+    ckw::ConvertPolicy convert_policy = ckw::ConvertPolicy::None;
+
+    writer->op_cast(tile_dst, tile_src, convert_policy);
+    ARM_COMPUTE_ERROR_ON_MSG(dst->has_tile() == false, "You must bind a tile before appending another component");
+}
+
+Window GpuCkwCast::get_window() const
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+
+    TensorShape output_shape = _dst->tensor_shape();
+    // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
+    // This is in line with the collapsing convention used by operators like Conv2d
+    output_shape.collapse(2U, 1U);
+    constexpr uint32_t vector_size_byte_opencl = 16;
+    const uint32_t     num_elems_processed_per_iteration =
+        adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+    Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+
+    return win;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
new file mode 100644
index 0000000000..2389301196
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWCAST
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWCAST
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwCast : public IGpuCkwComponentDriver
+{
+public:
+    using Attributes = ClComponentCast::Attributes;
+    /** Constructor
+     *
+     * For supported configurations please refer to @ref ClComponentCast::validate()
+     *
+     * @param[in] id         Component id
+     * @param[in] tensors    Tensor arguments to the component
+     * @param[in] attributes Component attributes
+     */
+    GpuCkwCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwCast);
+    /** Destructor */
+    ~GpuCkwCast() override = default;
+    // Inherited methods overriden:
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+
+private:
+    const ITensorInfo *_src;
+    const ITensorInfo *_dst;
+    Attributes         _attributes;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWCAST */
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.cpp
new file mode 100644
index 0000000000..cfccab186b
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.cpp
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
+#include "compute_kernel_writer/include/ckw/KernelWriter.h"
+#include <cstdint>
+#include <string>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuCkwDepthwiseConv2d::GpuCkwDepthwiseConv2d(ComponentId                      id,
+                                             const ArgumentPack<ITensorInfo> &tensors,
+                                             const Attributes                &attributes,
+                                             const Settings                  &settings)
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _wei{}, _bia{}, _dst{}, _attributes{attributes}, _settings{settings}
+{
+    _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _wei = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
+    if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
+    {
+        _bia = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
+    }
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _wei, _bia, _dst);
+}
+
+void GpuCkwDepthwiseConv2d::write_component_code(const ComponentGroup    &comp_group,
+                                                 GpuCkwVariableTable     &vtable,
+                                                 GpuCkwScopedKernelWriter writer) const
+{
+    // Data Layout is NHWC
+    const uint32_t width_idx  = get_data_layout_dimension_index(_wei->data_layout(), DataLayoutDimension::WIDTH);
+    const uint32_t height_idx = get_data_layout_dimension_index(_wei->data_layout(), DataLayoutDimension::HEIGHT);
+
+    /********************************************************************************
+     * 1 - Define tensors
+     ********************************************************************************/
+    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
+    GpuCkwComponentArgument *wei = vtable.declare_variable(comp_group, writer, _wei, "wei");
+    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+    GpuCkwComponentArgument *bia = nullptr;
+
+    const bool using_bias = _bia != nullptr;
+
+    if (using_bias)
+    {
+        bia = vtable.declare_variable(comp_group, writer, _bia, "bia");
+    }
+
+    /********************************************************************************
+     * 2 - Define CKW constants
+     ********************************************************************************/
+    const auto dst_dt           = to_ckw(_dst->data_type());
+    const auto kernel_height    = static_cast<int32_t>(_wei->dimension(height_idx));
+    const auto kernel_width     = static_cast<int32_t>(_wei->dimension(width_idx));
+    const auto src_w            = static_cast<int32_t>(_src->dimension(width_idx));
+    const auto src_h            = static_cast<int32_t>(_src->dimension(height_idx));
+    const auto dst_h            = static_cast<int32_t>(_dst->dimension(height_idx));
+    const auto stride_x         = static_cast<int32_t>(_attributes.stride().x());
+    const auto stride_y         = static_cast<int32_t>(_attributes.stride().y());
+    const auto pad_x            = static_cast<int32_t>(_attributes.pad().left);
+    const auto pad_y            = static_cast<int32_t>(_attributes.pad().top);
+    const auto depth_multiplier = static_cast<int32_t>(_attributes.depth_multiplier());
+    const auto dilation_x       = static_cast<int32_t>(_attributes.dilation().x());
+    const auto dilation_y       = static_cast<int32_t>(_attributes.dilation().y());
+    const auto kernel_size      = kernel_width * kernel_height;
+
+    // CKW constants
+    auto const_kernel_w_i32 = writer->declare_constant_tile(ckw::ConstantData({{kernel_width}}, ckw::DataType::Int32));
+    auto const_kernel_size_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{kernel_size}}, ckw::DataType::Int32));
+    auto const_dst_h_i32    = writer->declare_constant_tile(ckw::ConstantData({{dst_h}}, ckw::DataType::Int32));
+    auto const_src_w_i32    = writer->declare_constant_tile(ckw::ConstantData({{src_w}}, ckw::DataType::Int32));
+    auto const_src_h_i32    = writer->declare_constant_tile(ckw::ConstantData({{src_h}}, ckw::DataType::Int32));
+    auto const_stride_x_i32 = writer->declare_constant_tile(ckw::ConstantData({{stride_x}}, ckw::DataType::Int32));
+    auto const_stride_y_i32 = writer->declare_constant_tile(ckw::ConstantData({{stride_y}}, ckw::DataType::Int32));
+    auto const_pad_x_i32    = writer->declare_constant_tile(ckw::ConstantData({{pad_x}}, ckw::DataType::Int32));
+    auto const_pad_y_i32    = writer->declare_constant_tile(ckw::ConstantData({{pad_y}}, ckw::DataType::Int32));
+    auto const_0_i32        = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
+    auto const_neg_1_i32    = writer->declare_constant_tile(ckw::ConstantData({{-1}}, ckw::DataType::Int32));
+    auto const_depth_multiplier_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{depth_multiplier}}, ckw::DataType::Int32));
+    auto const_dilation_x_i32 = writer->declare_constant_tile(ckw::ConstantData({{dilation_x}}, ckw::DataType::Int32));
+    auto const_dilation_y_i32 = writer->declare_constant_tile(ckw::ConstantData({{dilation_y}}, ckw::DataType::Int32));
+    auto const_0_fp           = writer->declare_constant_tile(ckw::ConstantData({{0.0f}}, dst_dt));
+
+    /********************************************************************************
+     * 3 - Define the compute block parameters and destination tile (if not root component)
+     *     Bind the tile to the tensor to share it among different components and
+     *     initialize the compute block parameters
+     ********************************************************************************/
+    // The compute block parameters depend on the employed tensor format
+    const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
+
+    // Destination compute block size
+    const int32_t dst_n0 = root_window.x().step();
+    const int32_t dst_m0 = root_window.y().step();
+
+    // Destination compute block size left-over
+    const int32_t dst_n0_partial = _dst->dimension(0) % dst_n0;
+    const int32_t dst_m0_partial = _dst->dimension(1) % dst_m0;
+
+    // Shift-back for the overlapping-min strategy
+    const int32_t dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
+
+    const int32_t src_m0 = kernel_width + (dst_m0 - 1);
+    const int32_t src_n0 = depth_multiplier > 1 ? 1 : dst_n0;
+    const int32_t wei_m0 = kernel_width;
+    const int32_t wei_n0 = dst_n0;
+
+    ckw::TensorSampler sampler_dst;
+    sampler_dst.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    if (dst_n0_partial == 0)
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    }
+    else
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::OverlappingMin);
+    }
+
+    if (dst_m0_partial == 0)
+    {
+        sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    }
+    else
+    {
+        sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::ClampToBorderMaxOnly);
+    }
+
+    sampler_dst.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    sampler_dst.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+    // Declare destination tile
+    auto tile_dst = writer->declare_tile("dst", ckw::TileInfo(dst_dt, dst_m0, dst_n0));
+
+    // Initialize the destination tile
+    writer->op_assign(tile_dst, const_0_fp);
+
+    // Bind tile to the tensor
+    dst->init_virtual_tensor(tile_dst, sampler_dst);
+
+    /********************************************************************************
+     * 4 - Define the compute block parameters CKW constants
+     ********************************************************************************/
+    // Only now we can declare the N0 and M0 as constant
+    auto const_dst_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_n0}}, ckw::DataType::Int32));
+    auto const_dst_m0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_m0}}, ckw::DataType::Int32));
+    auto const_shift_back_dst_n0_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{dst_shift_back}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 5 - Define the sampler for the input tensors
+     ********************************************************************************/
+    // SOURCE SAMPLER
+    ckw::TensorSampler sampler_src;
+    sampler_src.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    sampler_src.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    sampler_src.address_mode_y(ckw::TensorSamplerAddressModeY::SkipLessThanZero);
+    sampler_src.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    sampler_src.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+    // WEIGHTS SAMPLER
+    // We cannot have out-of-bounds accesses for the weights
+    ckw::TensorSampler sampler_wei;
+    sampler_wei.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    sampler_wei.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    sampler_wei.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    sampler_wei.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    if (_settings.export_weights_to_cl_image())
+    {
+        sampler_wei.storage(ckw::TensorStorageType::Texture2dReadOnly);
+    }
+    else
+    {
+        sampler_wei.storage(ckw::TensorStorageType::BufferUint8Ptr);
+    }
+
+    // BIAS SAMPLER
+    ckw::TensorSampler sampler_bia;
+    sampler_bia.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    sampler_bia.address_mode_x(sampler_dst.address_mode_x());
+    sampler_bia.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    sampler_bia.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    sampler_bia.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+    /********************************************************************************
+     * 6 - Extra operations required before writing the main code (Optional)
+     ********************************************************************************/
+    // Not required
+
+    /********************************************************************************
+     * 7 - Get the coordinates of the destination tile
+     ********************************************************************************/
+    auto tile_gid_0 = writer->declare_tile("gid_0", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_1 = writer->declare_tile("gid_1", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_2 = writer->declare_tile("gid_2", ckw::TileInfo(ckw::DataType::Int32));
+
+    writer->op_get_global_id(tile_gid_0, 0);
+    writer->op_get_global_id(tile_gid_1, 1);
+    writer->op_get_global_id(tile_gid_2, 2);
+
+    auto tile_cout0 = writer->declare_tile("cout0", ckw::TileInfo(ckw::DataType::Int32)); // OFM
+    auto tile_mout0 = writer->declare_tile("mout0", ckw::TileInfo(ckw::DataType::Int32)); // WIDTH
+    auto tile_mout1 = writer->declare_tile("mout1", ckw::TileInfo(ckw::DataType::Int32)); // HEIGHT
+    auto tile_bout0 = writer->declare_tile("bout0", ckw::TileInfo(ckw::DataType::Int32)); // BATCH SIZE IDX
+
+    // Calculate coordinates
+    get_coordinate_from_gws_overlapping_min(writer, tile_cout0, tile_gid_0, const_dst_n0_i32,
+                                            const_shift_back_dst_n0_i32, const_0_i32);
+    get_coordinate_from_gws(writer, tile_mout0, tile_gid_1, const_dst_m0_i32);
+    writer->op_binary(tile_mout1, ckw::BinaryOp::Mod, tile_gid_2, const_dst_h_i32);
+    writer->op_binary(tile_bout0, ckw::BinaryOp::Div, tile_gid_2, const_dst_h_i32);
+
+    auto tile_src_ci = writer->declare_tile("src_ci", ckw::DataType::Int32);
+    writer->op_binary(tile_src_ci, ckw::BinaryOp::Div, tile_cout0, const_depth_multiplier_i32);
+
+    auto tile_src_xi = writer->declare_tile("src_xi", ckw::DataType::Int32);
+    writer->op_binary(tile_src_xi, ckw::BinaryOp::Mul, tile_mout0, const_stride_x_i32);
+    writer->op_binary(tile_src_xi, ckw::BinaryOp::Sub, tile_src_xi, const_pad_x_i32);
+
+    auto tile_src_yi = writer->declare_tile("src_yi", ckw::DataType::Int32);
+    writer->op_binary(tile_src_yi, ckw::BinaryOp::Mul, tile_mout1, const_stride_y_i32);
+    writer->op_binary(tile_src_yi, ckw::BinaryOp::Sub, tile_src_yi, const_pad_y_i32);
+
+    // Loop variables
+    auto tile_yk = writer->declare_tile("yk", ckw::DataType::Int32);
+
+    writer->op_assign(tile_yk, const_0_i32);
+
+    // clang-format off
+    writer->op_for_loop(tile_yk, ckw::BinaryOp::Less, const_kernel_size_i32, tile_yk, ckw::AssignmentOp::Increment, const_kernel_w_i32,
+    [&]()
+    {
+        auto tile_src = writer->declare_tile("a", ckw::TileInfo(to_ckw(_src->data_type()), src_m0, src_n0));
+        auto tile_wei = writer->declare_tile("b", ckw::TileInfo(to_ckw(_wei->data_type()), wei_m0, wei_n0));
+
+        writer->op_assign(tile_src, const_0_fp);
+
+        auto tile_x_gte_0 = writer->declare_tile("x_gte_0", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_y_gte_0 = writer->declare_tile("y_gte_0", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_x_lt_w  = writer->declare_tile("x_lt_w", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_y_lt_h  = writer->declare_tile("y_lt_h", ckw::TileInfo(ckw::DataType::Int32));
+
+        // Check if yi + yk * DILATION_Y is out-of-bound
+        writer->op_binary(tile_y_gte_0, ckw::BinaryOp::GreaterEqual, tile_src_yi, const_0_i32);
+        writer->op_binary(tile_y_lt_h, ckw::BinaryOp::Less, tile_src_yi, const_src_h_i32);
+
+        auto tile_src_mi = writer->declare_tile("src_mi", ckw::TileInfo(ckw::DataType::Int32));
+
+        // Load src
+        for(int32_t xk = 0; xk < src_m0; ++xk)
+        {
+            auto const_xk_i32 = writer->declare_constant_tile(ckw::ConstantData({{xk}}, ckw::DataType::Int32));
+
+            // xi + xk * DILATION_X
+            writer->op_binary(tile_src_mi, ckw::BinaryOp::Mul, const_xk_i32, const_dilation_x_i32);
+            writer->op_binary(tile_src_mi, ckw::BinaryOp::Add, tile_src_mi, tile_src_xi);
+
+            // Check if xi + xk * DILATION_X is out-of-bound
+            writer->op_binary(tile_x_gte_0, ckw::BinaryOp::GreaterEqual, tile_src_mi, const_0_i32);
+            writer->op_binary(tile_x_lt_w, ckw::BinaryOp::Less, tile_src_mi, const_src_w_i32);
+
+            // Set mi to -1 if we have out-of-bound memory accesses
+            writer->op_ternary(tile_src_mi, ckw::TernaryOp::Select, const_neg_1_i32, tile_src_mi, tile_x_gte_0);
+            writer->op_ternary(tile_src_mi, ckw::TernaryOp::Select, const_neg_1_i32, tile_src_mi, tile_x_lt_w);
+            writer->op_ternary(tile_src_mi, ckw::TernaryOp::Select, const_neg_1_i32, tile_src_mi, tile_y_gte_0);
+            writer->op_ternary(tile_src_mi, ckw::TernaryOp::Select, const_neg_1_i32, tile_src_mi, tile_y_lt_h);
+
+            writer->op_load(tile_src.row(xk), src->tensor(), sampler_src, tile_src_ci, tile_src_mi, tile_src_yi, tile_bout0);
+        }
+
+        // Load wei
+        writer->op_load(tile_wei, wei->tensor(), sampler_wei, tile_cout0, tile_yk, const_0_i32, const_0_i32);
+
+        // Attention: MAC (Multiply-and-Accumulate) ternary operator is currently unsupported in CKW
+        // Therefore, this part should be replaced with the MAC ternary operator when availabe
+        auto tile_tmp = writer->declare_tile("tmp", ckw::TileInfo(to_ckw(_src->data_type()), 1, dst_n0));
+        for(int32_t m0 = 0; m0 < dst_m0; ++m0)
+        {
+            for(int32_t xk = 0; xk < kernel_width; ++xk)
+            {
+                auto tile_a = tile_src.row(m0 + xk);
+                auto tile_b = tile_wei.row(xk);
+                auto tile_c = tile_dst.row(m0);
+
+                writer->op_binary(tile_tmp, ckw::BinaryOp::Mul, tile_a, tile_b);
+                writer->op_binary(tile_c, ckw::BinaryOp::Add, tile_c, tile_tmp);
+            }
+        }
+        writer->op_binary(tile_src_yi, ckw::BinaryOp::Add, tile_src_yi, const_dilation_y_i32);
+    });
+    // clang-format on
+
+    // Bias addition
+    // NOTE: This operation will be removed from this kernel as the interface is standardized. The intended way of
+    // performing bias addition is to fuse this convolution kernel with a following elementwise addition kernel.
+    if (using_bias)
+    {
+        if (!bia->has_tile())
+        {
+            auto tile_bia = writer->declare_tile("bia", ckw::TileInfo(to_ckw(_src->data_type()), 1, dst_n0));
+            writer->op_load(tile_bia, bia->tensor(), sampler_bia, tile_cout0, const_0_i32, const_0_i32, const_0_i32);
+            bia->init_virtual_tensor(tile_bia, sampler_bia);
+        }
+        auto &tile_bia = bia->tile();
+
+        writer->op_binary(tile_dst, ckw::BinaryOp::Add, tile_dst, tile_bia);
+    }
+
+    ARM_COMPUTE_ERROR_ON_MSG(dst->has_tile() == false, "You must bind a tile before appending another component");
+}
+
+Window GpuCkwDepthwiseConv2d::get_window() const
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+    TensorShape output_shape = _dst->tensor_shape();
+
+    Window win = calculate_max_window(output_shape, Steps(_settings.n0(), _settings.m0()));
+    return win.collapse(win, Window::DimZ);
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.h
new file mode 100644
index 0000000000..a15d3ee710
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWDEPTHWISECONV2D_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWDEPTHWISECONV2D_H
+
+#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+class GpuCkwDepthwiseConv2d : public IGpuCkwComponentDriver
+{
+public:
+    using Attributes = ClComponentDepthwiseConv2d::Attributes;
+    using Settings   = ClComponentDepthwiseConv2d::Settings;
+
+    /** Constructor
+     *
+     * For supported configurations please refer to @ref ClComponentDepthwiseConv2d::validate()
+     *
+     * @param[in] id         Component id
+     * @param[in] tensors    Tensor arguments to the component
+     * @param[in] attributes Component attributes
+     * @param[in] settings   Component settings
+     */
+    GpuCkwDepthwiseConv2d(ComponentId                      id,
+                          const ArgumentPack<ITensorInfo> &tensors,
+                          const Attributes                &attributes,
+                          const Settings                  &settings);
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwDepthwiseConv2d);
+    /** Destructor */
+    ~GpuCkwDepthwiseConv2d() override = default;
+    // Inherited methods overriden:
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+
+private:
+    const ITensorInfo *_src;
+    const ITensorInfo *_wei;
+    const ITensorInfo *_bia;
+    const ITensorInfo *_dst;
+    Attributes         _attributes;
+    Settings           _settings;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWDEPTHWISECONV2D_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp
new file mode 100644
index 0000000000..eb4f644eb6
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp
@@ -0,0 +1,427 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+
+#include "compute_kernel_writer/include/ckw/KernelWriter.h"
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+using TileContainer = std::vector<std::vector<int32_t>>;
+
+GpuCkwDirectConv2d::GpuCkwDirectConv2d(ComponentId                      id,
+                                       const ArgumentPack<ITensorInfo> &tensors,
+                                       const Attributes                &attributes,
+                                       const Settings                  &settings)
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _wei{}, _bia{}, _dst{}, _attributes{attributes}, _settings{settings}
+{
+    _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _wei = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
+    _bia = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _wei, _dst); // Bias can be null
+}
+
+void GpuCkwDirectConv2d::write_component_code(const ComponentGroup    &comp_group,
+                                              GpuCkwVariableTable     &vtable,
+                                              GpuCkwScopedKernelWriter writer) const
+{
+    const auto desc = _settings.direct_conv_descriptor();
+    ARM_COMPUTE_ERROR_ON_MSG(desc.export_input_to_cl_image || desc.export_output_to_cl_image,
+                             "Only the weights tensor can be exported to cl_image");
+
+    const uint32_t channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
+    const uint32_t width_idx   = get_data_layout_dimension_index(_wei->data_layout(), DataLayoutDimension::WIDTH);
+    const uint32_t height_idx  = get_data_layout_dimension_index(_wei->data_layout(), DataLayoutDimension::HEIGHT);
+
+    /********************************************************************************
+     * 1 - Define tensors
+     ********************************************************************************/
+    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
+    GpuCkwComponentArgument *wei = vtable.declare_variable(comp_group, writer, _wei, "wei");
+    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+    GpuCkwComponentArgument *bia = nullptr;
+
+    const bool using_bias = _bia != nullptr;
+
+    if (using_bias)
+    {
+        bia = vtable.declare_variable(comp_group, writer, _bia, "bia");
+    }
+
+    /********************************************************************************
+     * 2 - Define CKW constants
+     ********************************************************************************/
+    const auto dst_dt        = to_ckw(_dst->data_type());
+    const auto kernel_height = static_cast<int32_t>(_wei->dimension(height_idx));
+    const auto kernel_width  = static_cast<int32_t>(_wei->dimension(width_idx));
+    const auto src_c         = static_cast<int32_t>(_src->dimension(channel_idx));
+    const auto src_w         = static_cast<int32_t>(_src->dimension(width_idx));
+    const auto src_h         = static_cast<int32_t>(_src->dimension(height_idx));
+    const auto dst_w         = static_cast<int32_t>(_dst->dimension(width_idx));
+    const auto stride_x      = static_cast<int32_t>(_attributes.stride().x());
+    const auto stride_y      = static_cast<int32_t>(_attributes.stride().y());
+    const auto pad_x         = static_cast<int32_t>(_attributes.pad().left);
+    const auto pad_y         = static_cast<int32_t>(_attributes.pad().top);
+    const auto kernel_size   = kernel_width * kernel_height;
+    const auto k0 =
+        static_cast<int32_t>(adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx)));
+
+    // CKW constants
+    auto const_kernel_w_i32 = writer->declare_constant_tile(ckw::ConstantData({{kernel_width}}, ckw::DataType::Int32));
+    auto const_kernel_size_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{kernel_size}}, ckw::DataType::Int32));
+    auto const_src_c_i32    = writer->declare_constant_tile(ckw::ConstantData({{src_c}}, ckw::DataType::Int32));
+    auto const_src_w_i32    = writer->declare_constant_tile(ckw::ConstantData({{src_w}}, ckw::DataType::Int32));
+    auto const_src_h_i32    = writer->declare_constant_tile(ckw::ConstantData({{src_h}}, ckw::DataType::Int32));
+    auto const_dst_w_i32    = writer->declare_constant_tile(ckw::ConstantData({{dst_w}}, ckw::DataType::Int32));
+    auto const_stride_x_i32 = writer->declare_constant_tile(ckw::ConstantData({{stride_x}}, ckw::DataType::Int32));
+    auto const_stride_y_i32 = writer->declare_constant_tile(ckw::ConstantData({{stride_y}}, ckw::DataType::Int32));
+    auto const_pad_x_i32    = writer->declare_constant_tile(ckw::ConstantData({{pad_x}}, ckw::DataType::Int32));
+    auto const_pad_y_i32    = writer->declare_constant_tile(ckw::ConstantData({{pad_y}}, ckw::DataType::Int32));
+    auto const_k0_i32       = writer->declare_constant_tile(ckw::ConstantData({{k0}}, ckw::DataType::Int32));
+    auto const_0_i32        = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
+    auto const_pos_1_i32    = writer->declare_constant_tile(ckw::ConstantData({{1}}, ckw::DataType::Int32));
+    auto const_neg_1_i32    = writer->declare_constant_tile(ckw::ConstantData({{-1}}, ckw::DataType::Int32));
+    auto const_0_fp         = writer->declare_constant_tile(ckw::ConstantData({{0.0f}}, dst_dt));
+    auto const_src_c_i32_minus_k0_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{src_c - k0}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 3 - Define the compute block parameters and destination tile (if not root component)
+     *     Bind the tile to the tensor to share it among different components and
+     *     initialize the compute block parameters
+     ********************************************************************************/
+    // The compute block parameters depend on the employed tensor format
+    const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
+
+    // Destination compute block size
+    const int32_t dst_n0 = root_window.x().step();
+    const int32_t dst_m0 = root_window.y().step();
+
+    // Destination compute block size left-over
+    const int32_t dst_n0_partial = _dst->dimension(0) % dst_n0;
+    const int32_t dst_m0_partial = (_dst->dimension(1) * _dst->dimension(2)) % dst_m0;
+
+    // Shift-back for the overlapping-min strategy
+    const int32_t dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
+
+    ckw::TensorSampler sampler_dst;
+    sampler_dst.format(ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1);
+    if (dst_n0_partial == 0)
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    }
+    else
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::OverlappingMin);
+    }
+
+    if (dst_m0_partial == 0)
+    {
+        sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    }
+    else
+    {
+        sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::ClampToBorderMaxOnly);
+    }
+
+    sampler_dst.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    sampler_dst.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+    // Declare destination tile
+    auto tile_dst = writer->declare_tile("dst", ckw::TileInfo(dst_dt, dst_m0, dst_n0));
+
+    // Initialize destination tile
+    writer->op_assign(tile_dst, const_0_fp);
+
+    // Bind tile to the tensor
+    dst->init_virtual_tensor(tile_dst, sampler_dst);
+
+    /********************************************************************************
+     * 4 - Define the compute block parameters CKW constants
+     ********************************************************************************/
+    // Only now we can declare the N0 and M0 as constant
+    auto const_dst_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_n0}}, ckw::DataType::Int32));
+    auto const_dst_m0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_m0}}, ckw::DataType::Int32));
+    auto const_shift_back_dst_n0_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{dst_shift_back}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 5 - Define the samplers for the input tensors
+     ********************************************************************************/
+    // Exporting the weights tensor to an OpenCL image object is currently only supported when:
+    //   a) k0 is equal to 4
+    // The current implementation expects to read a vector of 4 float values into the OpenCL image object.
+    //   b) K is a multiple of 4
+    // This is a limitation in the current interface due to the variable table being responsible for maintaining
+    // information about the TensorStorageType rather than the TensorTileSampler. As a result, TensorStorageType cannot
+    // be reassigned, and we cannot use a texture object for the weights tensor in cases where we expect to have an
+    // extra loop to compute the left-over elements.
+    const bool use_cl_image_for_weights = desc.export_weights_to_cl_image && (k0 == 4) && (src_c % 4 == 0);
+
+    // SOURCE SAMPLER
+    // - We cannot have out-of-bounds reads in the X dimension (mapped to the IFMs) as we have an extra loop to
+    //   compute left-over elements
+    // - We cannot have out-of-bounds reads when the kernel height is equal to 1. In all other cases, we need to ensure the
+    //   indirection buffer mi does not contain negative values representing out-of-bounds reads.
+    auto address_mode_y_src =
+        kernel_height == 1 ? ckw::TensorSamplerAddressModeY::None : ckw::TensorSamplerAddressModeY::SkipLessThanZero;
+    ckw::TensorSampler sampler_src;
+    sampler_src.format(ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1); // 3rd dimension collapsed with 2nd dimension
+    sampler_src.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    sampler_src.address_mode_y(address_mode_y_src);
+    sampler_src.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    sampler_src.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+    // WEIGHTS SAMPLER
+    // We cannot have out-of-bounds accesses for the weights
+    ckw::TensorSampler sampler_wei;
+    sampler_wei.format(ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1); // 3rd dimension collapsed with 2nd dimension
+    sampler_wei.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    sampler_wei.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    sampler_wei.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    if (use_cl_image_for_weights)
+    {
+        sampler_wei.storage(ckw::TensorStorageType::Texture2dReadOnly);
+    }
+    else
+    {
+        sampler_wei.storage(ckw::TensorStorageType::BufferUint8Ptr);
+    }
+
+    // BIAS SAMPLER
+    ckw::TensorSampler sampler_bia;
+
+    if (using_bias)
+    {
+        sampler_bia.format(ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1);
+        sampler_bia.address_mode_x(sampler_dst.address_mode_x());
+        sampler_bia.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+        sampler_bia.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+        sampler_bia.storage(ckw::TensorStorageType::BufferUint8Ptr);
+    }
+
+    /********************************************************************************
+     * 6 - Extra operations required before writing the main code (optional)
+     ********************************************************************************/
+
+    // Not required
+
+    /********************************************************************************
+     * 7 - Get the coordinates of the destination tile
+     ********************************************************************************/
+    auto tile_gid_0 = writer->declare_tile("gid_0", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_1 = writer->declare_tile("gid_1", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_2 = writer->declare_tile("gid_2", ckw::TileInfo(ckw::DataType::Int32));
+
+    writer->op_get_global_id(tile_gid_0, 0);
+    writer->op_get_global_id(tile_gid_1, 1);
+    writer->op_get_global_id(tile_gid_2, 2);
+
+    auto tile_cout = writer->declare_tile("cout", ckw::TileInfo(ckw::DataType::Int32)); // OFM
+    auto tile_mout = writer->declare_tile("mout", ckw::TileInfo(ckw::DataType::Int32)); // WIDTH x HEIGHT
+    auto tile_bout = writer->declare_tile("bout", ckw::TileInfo(ckw::DataType::Int32)); // BATCH SIZE IDX
+
+    // Calculate coordinates
+    get_coordinate_from_gws_overlapping_min(writer, tile_cout, tile_gid_0, const_dst_n0_i32,
+                                            const_shift_back_dst_n0_i32, const_0_i32);
+    get_coordinate_from_gws(writer, tile_mout, tile_gid_1, const_dst_m0_i32);
+    get_coordinate_from_gws(writer, tile_bout, tile_gid_2, const_pos_1_i32);
+
+    /********************************************************************************
+     * 8 - Write the rest of the code
+     ********************************************************************************/
+    // We create a 2d container of size (dst_m0, 1) to store the indices for iteration
+    TileContainer it;
+    for (int32_t m = 0; m < dst_m0; ++m)
+    {
+        std::vector<int32_t> idx{m};
+        it.push_back({idx});
+    }
+
+    const auto &const_idxs = writer->declare_constant_tile(ckw::ConstantData(it, ckw::DataType::Int32));
+
+    auto tile_xi = writer->declare_tile("xi", ckw::TileInfo(ckw::DataType::Int32, dst_m0, 1));
+    auto tile_yi = writer->declare_tile("yi", ckw::TileInfo(ckw::DataType::Int32, dst_m0, 1));
+
+    // Convert the linear index to coordinate
+    // xi = ((mout + i) % dst_w) * stride_x - pad_x
+    // yi = ((mout + i) / dst_w) * stride_y - pad_y
+    writer->op_binary(tile_xi, ckw::BinaryOp::Add, tile_mout, const_idxs);
+    writer->op_binary(tile_yi, ckw::BinaryOp::Add, tile_mout, const_idxs);
+    writer->op_binary(tile_xi, ckw::BinaryOp::Mod, tile_xi, const_dst_w_i32);
+    writer->op_binary(tile_yi, ckw::BinaryOp::Div, tile_yi, const_dst_w_i32);
+    writer->op_binary(tile_xi, ckw::BinaryOp::Mul, tile_xi, const_stride_x_i32);
+    writer->op_binary(tile_yi, ckw::BinaryOp::Mul, tile_yi, const_stride_y_i32);
+    writer->op_binary(tile_xi, ckw::BinaryOp::Sub, tile_xi, const_pad_x_i32);
+    writer->op_binary(tile_yi, ckw::BinaryOp::Sub, tile_yi, const_pad_y_i32);
+
+    auto tile_y_b = writer->declare_tile("y_b", ckw::TileInfo(ckw::DataType::Int32));
+    writer->op_binary(tile_y_b, ckw::BinaryOp::Mul, tile_cout, const_kernel_size_i32);
+
+    auto tile_i = writer->declare_tile("i", ckw::TileInfo(ckw::DataType::Int32));
+    writer->op_assign(tile_i, const_0_i32);
+
+    // clang-format off
+    writer->op_for_loop(tile_i, ckw::BinaryOp::Less, const_kernel_size_i32, tile_i, ckw::AssignmentOp::Increment, const_pos_1_i32, [&]()
+    {
+        auto tile_x_k = writer->declare_tile("x_k", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_y_k = writer->declare_tile("y_k", ckw::TileInfo(ckw::DataType::Int32));
+
+        writer->op_binary(tile_x_k, ckw::BinaryOp::Mod, tile_i, const_kernel_w_i32);
+        writer->op_binary(tile_y_k, ckw::BinaryOp::Div, tile_i, const_kernel_w_i32);
+
+        auto tile_ck = writer->declare_tile("ck", ckw::TileInfo(ckw::DataType::Int32));
+        writer->op_assign(tile_ck, const_0_i32);
+
+        // Construct an indirection buffer containing the precalculated addresses of elements in the source tensor
+        // x_s = xi + x_k
+        // y_s = yi + y_k
+        // mi = x_s + y_s * width;
+        // mi = select(-1, mi, x_s >= 0);
+        // mi = select(-1, mi, x_s < width);
+        // mi = select(-1, mi, y_s >= 0);
+        // mi = select(-1, mi, y_s < height);
+        auto tile_xs = writer->declare_tile("xs", ckw::TileInfo(ckw::DataType::Int32, dst_m0, 1));
+        auto tile_ys = writer->declare_tile("ys", ckw::TileInfo(ckw::DataType::Int32, dst_m0, 1));
+        auto tile_mi = writer->declare_tile("mi", ckw::TileInfo(ckw::DataType::Int32, dst_m0, 1));
+
+        auto tile_xs_gte_0 = writer->declare_tile("xs_gte_0", ckw::TileInfo(ckw::DataType::Int32, dst_m0, 1));
+        auto tile_ys_gte_0 = writer->declare_tile("ys_gte_0", ckw::TileInfo(ckw::DataType::Int32, dst_m0, 1));
+        auto tile_xs_lt_w  = writer->declare_tile("xs_lt_w", ckw::TileInfo(ckw::DataType::Int32, dst_m0, 1));
+        auto tile_ys_lt_h  = writer->declare_tile("ys_lt_h", ckw::TileInfo(ckw::DataType::Int32, dst_m0, 1));
+
+        writer->op_binary(tile_xs, ckw::BinaryOp::Add, tile_xi, tile_x_k);
+        writer->op_binary(tile_ys, ckw::BinaryOp::Add, tile_yi, tile_y_k);
+        writer->op_binary(tile_mi, ckw::BinaryOp::Mul, tile_ys, const_src_w_i32);
+        writer->op_binary(tile_mi, ckw::BinaryOp::Add, tile_mi, tile_xs);
+        writer->op_binary(tile_xs_gte_0, ckw::BinaryOp::GreaterEqual, tile_xs, const_0_i32);
+        writer->op_binary(tile_ys_gte_0, ckw::BinaryOp::GreaterEqual, tile_ys, const_0_i32);
+        writer->op_binary(tile_xs_lt_w, ckw::BinaryOp::Less, tile_xs, const_src_w_i32);
+        writer->op_binary(tile_ys_lt_h, ckw::BinaryOp::Less, tile_ys, const_src_h_i32);
+        writer->op_ternary(tile_mi, ckw::TernaryOp::Select, const_neg_1_i32, tile_mi, tile_xs_gte_0);
+        writer->op_ternary(tile_mi, ckw::TernaryOp::Select, const_neg_1_i32, tile_mi, tile_ys_gte_0);
+        writer->op_ternary(tile_mi, ckw::TernaryOp::Select, const_neg_1_i32, tile_mi, tile_xs_lt_w);
+        writer->op_ternary(tile_mi, ckw::TernaryOp::Select, const_neg_1_i32, tile_mi, tile_ys_lt_h);
+
+        writer->op_for_loop(tile_ck, ckw::BinaryOp::LessEqual, const_src_c_i32_minus_k0_i32, tile_ck, ckw::AssignmentOp::Increment, const_k0_i32, [&]()
+        {
+            auto tile_lhs = writer->declare_tile("lhs", ckw::TileInfo(to_ckw(_src->data_type()), dst_m0, k0));
+            auto tile_rhs = writer->declare_tile("rhs", ckw::TileInfo(to_ckw(_wei->data_type()), dst_n0, k0));
+            writer->op_assign(tile_lhs, const_0_fp);
+            writer->op_assign(tile_rhs, const_0_fp);
+
+            writer->op_load_indirect(tile_lhs, src->tensor(), sampler_src, tile_ck, tile_mi, const_0_i32, tile_bout);
+            writer->op_load_dilated(tile_rhs, wei->tensor(), sampler_wei, tile_ck, tile_y_b, const_0_i32, const_0_i32, const_pos_1_i32, const_kernel_size_i32);
+
+            writer->op_binary(tile_dst, ckw::BinaryOp::MatMul_Nt_T, tile_lhs, tile_rhs);
+        });
+
+        // Left-over accumulations for when K is not a multiple of k0
+        if(((src_c % k0) != 0))
+        {
+            writer->op_for_loop(tile_ck, ckw::BinaryOp::Less, const_src_c_i32, tile_ck, ckw::AssignmentOp::Increment, const_pos_1_i32, [&]()
+            {
+                auto tile_lhs = writer->declare_tile("lhs_leftover", ckw::TileInfo(to_ckw(_src->data_type()), dst_m0, 1));
+                auto tile_rhs = writer->declare_tile("rhs_leftover", ckw::TileInfo(to_ckw(_wei->data_type()), dst_n0, 1));
+                writer->op_assign(tile_lhs, const_0_fp);
+                writer->op_assign(tile_rhs, const_0_fp);
+
+                writer->op_load_indirect(tile_lhs, src->tensor(), sampler_src, tile_ck, tile_mi, const_0_i32, tile_bout);
+                writer->op_load_dilated(tile_rhs, wei->tensor(), sampler_wei, tile_ck, tile_y_b, const_0_i32, const_0_i32, const_pos_1_i32, const_kernel_size_i32);
+
+                writer->op_binary(tile_dst, ckw::BinaryOp::MatMul_Nt_T, tile_lhs, tile_rhs);
+            });
+        }
+
+        writer->op_binary(tile_y_b, ckw::BinaryOp::Add, tile_y_b, const_pos_1_i32);
+    });
+    // clang-format on
+
+    // NOTE: The bias addition will be removed from this kernel as the interface is standardized. The intended way of
+    // performing bias addition is to fuse this convolution kernel with a following elementwise addition kernel.
+    if (using_bias)
+    {
+        if (!bia->has_tile())
+        {
+            auto tile_bia = writer->declare_tile("bia", ckw::TileInfo(to_ckw(_src->data_type()), 1, dst_n0));
+            writer->op_load(tile_bia, bia->tensor(), sampler_bia, tile_cout, const_0_i32, const_0_i32, const_0_i32);
+            bia->init_virtual_tensor(tile_bia, sampler_bia);
+        }
+        auto &tile_bia = bia->tile();
+
+        writer->op_binary(tile_dst, ckw::BinaryOp::Add, tile_dst, tile_bia);
+    }
+
+    ARM_COMPUTE_ERROR_ON_MSG(dst->has_tile() == false, "You must bind a tile before appending another component");
+}
+
+Window GpuCkwDirectConv2d::get_window() const
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+
+    const auto dst_shape = _dst->tensor_shape();
+    const auto desc      = _settings.direct_conv_descriptor();
+
+    const uint32_t dst_n0 = adjust_vec_size(desc.n0, dst_shape[0]);
+    const uint32_t dst_m0 = adjust_vec_size(desc.m0, dst_shape[1] * dst_shape[2]);
+
+    Window win = calculate_max_window(dst_shape, Steps(dst_n0, dst_m0));
+
+    const size_t dim_y_collapsed = ceil_to_multiple(dst_shape[1] * dst_shape[2], dst_m0);
+    win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, dst_m0));
+    win.set(Window::DimZ, Window::Dimension(0, dst_shape.total_size_upper(3), 1));
+
+    return win;
+}
+
+std::string GpuCkwDirectConv2d::get_name(const ComponentGroup &comp_group) const
+{
+    ARM_COMPUTE_UNUSED(comp_group);
+
+    return "direct_conv2d";
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.h
new file mode 100644
index 0000000000..139cf620e2
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWDIRECTCONV2D_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWDIRECTCONV2D_H
+
+#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwDirectConv2d : public IGpuCkwComponentDriver
+{
+public:
+    using Attributes = ClComponentDirectConv2d::Attributes;
+    using Settings   = ClComponentDirectConv2d::Settings;
+
+public:
+    /** Constructor
+     *
+     * For supported configurations please refer to @ref ClComponentDirectConv2d::validate()
+     *
+     * @param[in] id         Component id
+     * @param[in] tensors    Tensor arguments to the component
+     * @param[in] attributes Component attributes. Attributes are a set of parameters that define what a component does
+     * @param[in] settings   Component settings. Settings are a set of parameters that influence the implementation of a component
+     */
+    GpuCkwDirectConv2d(ComponentId                      id,
+                       const ArgumentPack<ITensorInfo> &tensors,
+                       const Attributes                &attributes,
+                       const Settings                  &settings);
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwDirectConv2d);
+    /** Destructor */
+    ~GpuCkwDirectConv2d() override = default;
+
+    // Inherited methods overriden
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+    std::string  get_name(const ComponentGroup &comp_group) const override;
+
+private:
+    const ITensorInfo *_src;
+    const ITensorInfo *_wei;
+    const ITensorInfo *_bia;
+    const ITensorInfo *_dst;
+
+    Attributes _attributes;
+    Settings   _settings;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWDIRECTCONV2D_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
new file mode 100644
index 0000000000..fb55acad53
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
@@ -0,0 +1,434 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuCkwElementwiseBinary.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "support/StringSupport.h"
+
+#include "compute_kernel_writer/include/ckw/KernelWriter.h"
+#include "compute_kernel_writer/include/ckw/types/ConstantData.h"
+#include "compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h"
+#include <cstdint>
+#include <string>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId                      id,
+                                                 const ArgumentPack<ITensorInfo> &tensors,
+                                                 const Attributes                &attributes)
+    : IGpuCkwComponentDriver{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}
+{
+    _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst);
+}
+
+void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup    &comp_group,
+                                                   GpuCkwVariableTable     &vtable,
+                                                   GpuCkwScopedKernelWriter writer) const
+{
+    /********************************************************************************
+     * 1 - Define tensors
+     ********************************************************************************/
+    GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs");
+    GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs");
+    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+
+    /********************************************************************************
+     * 2 - Define CKW constants
+     ********************************************************************************/
+    const auto dst_h = static_cast<int32_t>(_dst->dimension(1));
+
+    // CKW constants
+    auto const_dst_h_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_h}}, ckw::DataType::Int32));
+    auto const_pos_1_i32 = writer->declare_constant_tile(ckw::ConstantData({{1}}, ckw::DataType::Int32));
+    auto const_0_i32     = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 3 - Define the compute block parameters and destination tile (if not root component)
+     *     Bind the tile to the tensor to share it among different components and
+     *     initialize the compute block parameters
+     ********************************************************************************/
+    // The compute block parameters depend on the employed tensor format
+
+    // Destination compute block size
+    int32_t dst_n0 = -1;
+    int32_t dst_m0 = -1;
+
+    // Destination compute block size left-over
+    int32_t dst_n0_partial = -1;
+    int32_t dst_m0_partial = -1;
+
+    if (!dst->has_tile())
+    {
+        // If ROOT component, we use ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1
+        // as tensor format
+        const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
+
+        dst_n0         = root_window.x().step();
+        dst_m0         = root_window.y().step();
+        dst_n0_partial = _dst->dimension(0) % dst_n0;
+        dst_m0_partial = (_dst->dimension(1) * _dst->dimension(2)) % dst_m0;
+
+        ckw::TensorSampler sampler_dst;
+        sampler_dst.format(ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1);
+        if (dst_n0_partial == 0)
+        {
+            sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+        }
+        else
+        {
+            sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::OverlappingMin);
+        }
+
+        if (dst_m0_partial == 0)
+        {
+            sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+        }
+        else
+        {
+            sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::ClampToBorderMaxOnly);
+        }
+        sampler_dst.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+        sampler_dst.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+        // Declare destination tile
+        ckw::DataType dst_dt   = to_ckw(_dst->data_type());
+        auto          tile_dst = writer->declare_tile("dst", ckw::TileInfo(dst_dt, dst_m0, dst_n0));
+
+        // Bind tile to the tensor
+        dst->init_virtual_tensor(tile_dst, sampler_dst);
+    }
+    else
+    {
+        // Change dst_n0 and dst_m0 if NOT root component!
+        dst_n0 = dst->tile().tile_info().width();
+        dst_m0 = dst->tile().tile_info().height();
+
+        // Here, it is not required the calculation of dst_n0_partial and dst_m0_partial
+        // because if we enter this condition it means that the element-wise op is not the
+        // root component and the address modes have been already set.
+    }
+
+    const auto &tile_dst = dst->tile();
+
+    /********************************************************************************
+     * 4 - Define the compute block parameters CKW constants
+     ********************************************************************************/
+    // ...
+
+    /********************************************************************************
+     * 5 - Define the samplers for the input tensors
+     ********************************************************************************/
+    // Check whether the lhs tensor is a tile or tensor
+    // If it is a tile, create a sampler and load the content in a tile
+    if (!lhs->has_tile())
+    {
+        // Sampler
+        ckw::TensorSampler sampler_lhs = dst->tensor_sampler();
+
+        bool broadcast_x = false;
+        bool broadcast_y = false;
+
+        int32_t lhs_n0 = dst_n0;
+        int32_t lhs_m0 = dst_m0;
+
+        // Check whether we have broadcasting
+        // In case of broadcast, lhs can only be a vector or scalar.
+        // Broadcasting in other dimensions is not supported
+        if (_dst->dimension(0) != _lhs->dimension(0))
+        {
+            broadcast_x = true;
+            lhs_n0      = 1;
+        }
+
+        if (sampler_lhs.format() == ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1)
+        {
+            if (_dst->dimension(1) * _dst->dimension(2) != _lhs->dimension(1) * _lhs->dimension(2))
+            {
+                broadcast_y = true;
+                lhs_m0      = 1;
+            }
+        }
+        else if (sampler_lhs.format() == ckw::TensorSamplerFormat::Dim0_Dim1_Dim2)
+        {
+            if (_dst->dimension(1) != _lhs->dimension(1))
+            {
+                broadcast_y = true;
+                lhs_m0      = 1;
+            }
+        }
+
+        const int32_t lhs_partial_n0 = _lhs->dimension(0) % lhs_n0;
+        const int32_t lhs_shift_back = (lhs_n0 - lhs_partial_n0) % lhs_n0;
+
+        // Constants
+        auto const_lhs_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{lhs_n0}}, ckw::DataType::Int32));
+        auto const_lhs_m0_i32 = writer->declare_constant_tile(ckw::ConstantData({{lhs_m0}}, ckw::DataType::Int32));
+        auto const_lhs_shift_back_n0_i32 =
+            writer->declare_constant_tile(ckw::ConstantData({{lhs_shift_back}}, ckw::DataType::Int32));
+
+        auto tile_gid_0 = writer->declare_tile("gid_0_lhs", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_gid_1 = writer->declare_tile("gid_1_lhs", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_gid_2 = writer->declare_tile("gid_2_lhs", ckw::TileInfo(ckw::DataType::Int32));
+
+        writer->op_get_global_id(tile_gid_0, 0);
+        writer->op_get_global_id(tile_gid_1, 1);
+        writer->op_get_global_id(tile_gid_2, 2);
+
+        auto tile_cout0 = writer->declare_tile("cout0_lhs", ckw::TileInfo(ckw::DataType::Int32)); // OFM
+        auto tile_mout0 =
+            writer->declare_tile("mout0_lhs", ckw::TileInfo(ckw::DataType::Int32)); // WIDTH or WIDTH x HEIGHT
+        auto tile_mout1 = writer->declare_tile("mout1_lhs", ckw::TileInfo(ckw::DataType::Int32)); // HEIGHT or 0
+        auto tile_bout0 = writer->declare_tile("bout0_lhs", ckw::TileInfo(ckw::DataType::Int32)); // BATCH SIZE IDX
+
+        // Calculate coordinates
+        if (!broadcast_x)
+        {
+            get_coordinate_from_gws_overlapping_min(writer, tile_cout0, tile_gid_0, const_lhs_n0_i32,
+                                                    const_lhs_shift_back_n0_i32, const_0_i32);
+        }
+        else
+        {
+            writer->op_assign(tile_cout0, const_0_i32);
+        }
+
+        if (!broadcast_y)
+        {
+            get_coordinate_from_gws(writer, tile_mout0, tile_gid_1, const_lhs_m0_i32);
+        }
+        else
+        {
+            writer->op_assign(tile_mout0, const_0_i32);
+        }
+
+        // Get the boundary aware coordinates at each global dimension index
+        if (sampler_lhs.format() == ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1)
+        {
+            writer->op_assign(tile_mout1, const_0_i32);
+            get_coordinate_from_gws(writer, tile_bout0, tile_gid_2, const_pos_1_i32);
+        }
+        else if (sampler_lhs.format() == ckw::TensorSamplerFormat::Dim0_Dim1_Dim2)
+        {
+            // For tile_mout1 and tile_bout0 the step can only be 1
+            if (!broadcast_y)
+            {
+                writer->op_binary(tile_mout1, ckw::BinaryOp::Mod, tile_gid_2, const_dst_h_i32);
+            }
+            else
+            {
+                // If broadcast_y == true, it means that we have either a scalar or vector
+                // because broadcasting in other dimensions is not supported
+                writer->op_assign(tile_mout1, const_0_i32);
+            }
+
+            writer->op_binary(tile_bout0, ckw::BinaryOp::Div, tile_gid_2, const_dst_h_i32);
+        }
+
+        ckw::DataType lhs_dt   = to_ckw(_lhs->data_type());
+        auto          tile_lhs = writer->declare_tile("lhs", ckw::TileInfo(lhs_dt, lhs_m0, lhs_n0));
+
+        writer->op_load(tile_lhs, lhs->tensor(), sampler_lhs, tile_cout0, tile_mout0, tile_mout1, tile_bout0);
+
+        // Here, init_virtual_tensor() is used to bring the tile_lhs outside the compound statement
+        lhs->init_virtual_tensor(tile_lhs, sampler_lhs);
+    }
+
+    // Check whether the rhs tensor is a tile or tensor
+    // If it is a tile, create a sampler and load the content in a tile
+    if (!rhs->has_tile())
+    {
+        // Sampler
+        ckw::TensorSampler sampler_rhs = dst->tensor_sampler();
+
+        bool broadcast_x = false;
+        bool broadcast_y = false;
+
+        int32_t rhs_n0 = dst_n0;
+        int32_t rhs_m0 = dst_m0;
+
+        // Check whether we have broadcasting
+        // In case of broadcast, rhs can only be a vector or scalar.
+        // Broadcasting in other dimensions is not supported
+        if (_dst->dimension(0) != _rhs->dimension(0))
+        {
+            broadcast_x = true;
+            rhs_n0      = 1;
+        }
+
+        if (sampler_rhs.format() == ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1)
+        {
+            if (_dst->dimension(1) * _dst->dimension(2) != _rhs->dimension(1) * _rhs->dimension(2))
+            {
+                broadcast_y = true;
+                rhs_m0      = 1;
+            }
+        }
+        else if (sampler_rhs.format() == ckw::TensorSamplerFormat::Dim0_Dim1_Dim2)
+        {
+            if (_dst->dimension(1) != _rhs->dimension(1))
+            {
+                broadcast_y = true;
+                rhs_m0      = 1;
+            }
+        }
+
+        const int32_t rhs_partial_n0 = _rhs->dimension(0) % rhs_n0;
+        const int32_t rhs_shift_back = (rhs_n0 - rhs_partial_n0) % rhs_n0;
+
+        // Constants
+        auto const_rhs_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{rhs_n0}}, ckw::DataType::Int32));
+        auto const_rhs_m0_i32 = writer->declare_constant_tile(ckw::ConstantData({{rhs_m0}}, ckw::DataType::Int32));
+        auto const_rhs_shift_back_n0_i32 =
+            writer->declare_constant_tile(ckw::ConstantData({{rhs_shift_back}}, ckw::DataType::Int32));
+
+        auto tile_gid_0 = writer->declare_tile("gid_0_rhs", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_gid_1 = writer->declare_tile("gid_1_rhs", ckw::TileInfo(ckw::DataType::Int32));
+        auto tile_gid_2 = writer->declare_tile("gid_2_rhs", ckw::TileInfo(ckw::DataType::Int32));
+
+        writer->op_get_global_id(tile_gid_0, 0);
+        writer->op_get_global_id(tile_gid_1, 1);
+        writer->op_get_global_id(tile_gid_2, 2);
+
+        auto tile_cout0 = writer->declare_tile("cout0_rhs", ckw::TileInfo(ckw::DataType::Int32)); // OFM
+        auto tile_mout0 =
+            writer->declare_tile("mout0_rhs", ckw::TileInfo(ckw::DataType::Int32)); // WIDTH or WIDTH x HEIGHT
+        auto tile_mout1 = writer->declare_tile("mout1_rhs", ckw::TileInfo(ckw::DataType::Int32)); // HEIGHT or 0
+        auto tile_bout0 = writer->declare_tile("bout0_rhs", ckw::TileInfo(ckw::DataType::Int32)); // BATCH SIZE IDX
+
+        // Calculate coordinates
+        if (!broadcast_x)
+        {
+            get_coordinate_from_gws_overlapping_min(writer, tile_cout0, tile_gid_0, const_rhs_n0_i32,
+                                                    const_rhs_shift_back_n0_i32, const_0_i32);
+        }
+        else
+        {
+            writer->op_assign(tile_cout0, const_0_i32);
+        }
+
+        if (!broadcast_y)
+        {
+            get_coordinate_from_gws(writer, tile_mout0, tile_gid_1, const_rhs_m0_i32);
+        }
+        else
+        {
+            writer->op_assign(tile_mout0, const_0_i32);
+        }
+
+        // Get the boundary aware coordinates at each global dimension index
+        if (sampler_rhs.format() == ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1)
+        {
+            writer->op_assign(tile_mout1, const_0_i32);
+            get_coordinate_from_gws(writer, tile_bout0, tile_gid_2, const_pos_1_i32);
+        }
+        else if (sampler_rhs.format() == ckw::TensorSamplerFormat::Dim0_Dim1_Dim2)
+        {
+            // For tile_mout1 and tile_bout0 the step can only be 1
+            const auto src_w       = static_cast<int32_t>(_rhs->dimension(1));
+            auto       const_src_w = writer->declare_constant_tile(ckw::ConstantData({{src_w}}, ckw::DataType::Int32));
+            if (!broadcast_y)
+            {
+                writer->op_binary(tile_mout1, ckw::BinaryOp::Mod, tile_mout1, const_src_w);
+            }
+            else
+            {
+                // If broadcast_y == true, it means that we have either a scalar or vector
+                // because broadcasting in other dimensions is not supported
+                writer->op_assign(tile_mout1, const_0_i32);
+            }
+
+            writer->op_binary(tile_bout0, ckw::BinaryOp::Div, tile_mout1, const_src_w);
+        }
+
+        ckw::DataType rhs_dt   = to_ckw(_rhs->data_type());
+        auto          tile_rhs = writer->declare_tile("rhs", ckw::TileInfo(rhs_dt, rhs_m0, rhs_n0));
+
+        writer->op_load(tile_rhs, rhs->tensor(), sampler_rhs, tile_cout0, tile_mout0, tile_mout1, tile_bout0);
+
+        // Here, init_virtual_tensor() is used to bring the tile_rhs outside the compound statement
+        rhs->init_virtual_tensor(tile_rhs, sampler_rhs);
+    }
+
+    const auto &tile_lhs = lhs->tile();
+    const auto &tile_rhs = rhs->tile();
+
+    /********************************************************************************
+     * 7 - Write the rest of the code
+     ********************************************************************************/
+    // Perform the element-wise operation
+    writer->op_binary(tile_dst, to_ckw(_attributes), tile_lhs, tile_rhs);
+
+    ARM_COMPUTE_ERROR_ON_MSG(dst->has_tile() == false, "You must bind a tile before appending another component");
+}
+
+Window GpuCkwElementwiseBinary::get_window() const
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+
+    TensorShape output_shape = _dst->tensor_shape();
+    // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
+    // This is in line with the collapsing convention used by operators like Conv2d
+    output_shape.collapse(2U, 1U);
+    constexpr uint32_t vector_size_byte_opencl = 16;
+    const uint32_t     num_elems_processed_per_iteration =
+        adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+    Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+
+    return win;
+}
+
+std::string GpuCkwElementwiseBinary::get_name(const ComponentGroup &comp_group) const
+{
+    ARM_COMPUTE_UNUSED(comp_group);
+    const std::vector<std::string> build_params = {
+        "elementwise_binary",
+        "op",
+        to_string(_attributes.operation()),
+        "dt",
+        lower_string(string_from_data_type(_dst->data_type())),
+    };
+    return join(build_params, "_");
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h
new file mode 100644
index 0000000000..c6cbba28d3
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWELEMENTWISEBINARY_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWELEMENTWISEBINARY_H
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwElementwiseBinary : public IGpuCkwComponentDriver
+{
+public:
+    using Attributes = ClComponentElementwiseBinary::Attributes;
+    /** Constructor
+     *
+     * For supported configurations please refer to @ref ClComponentElementwiseBinary::validate()
+     *
+     * @param[in] id         Component id
+     * @param[in] tensors    Tensor arguments to the component
+     * @param[in] attributes Component attributes
+     */
+    GpuCkwElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwElementwiseBinary);
+    /** Destructor */
+    ~GpuCkwElementwiseBinary() override = default;
+    // Inherited methods overriden:
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+    std::string  get_name(const ComponentGroup &comp_group) const override;
+
+private:
+    const ITensorInfo *_lhs;
+    const ITensorInfo *_rhs;
+    const ITensorInfo *_dst;
+    Attributes         _attributes;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWELEMENTWISEBINARY_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp
new file mode 100644
index 0000000000..14ad3847fc
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "support/StringSupport.h"
+
+#include "compute_kernel_writer/include/ckw/KernelWriter.h"
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+GpuCkwMatMul::GpuCkwMatMul(ComponentId                      id,
+                           const ArgumentPack<ITensorInfo> &tensors,
+                           const Attributes                &attributes,
+                           const Settings                  &settings)
+    : IGpuCkwComponentDriver{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}, _settings{settings}
+{
+    _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst);
+}
+
+void GpuCkwMatMul::write_component_code(const ComponentGroup    &comp_group,
+                                        GpuCkwVariableTable     &vtable,
+                                        GpuCkwScopedKernelWriter writer) const
+{
+    /********************************************************************************
+     * 1 - Define tensors
+     ********************************************************************************/
+    GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs");
+    GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs");
+    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+
+    /********************************************************************************
+     * 2 - Define CKW constants
+     ********************************************************************************/
+    const auto k =
+        _attributes.adj_lhs() ? static_cast<int32_t>(_lhs->dimension(1)) : static_cast<int32_t>(_lhs->dimension(0));
+    const auto k0     = static_cast<int32_t>(adjust_vec_size(_settings.k0(), k));
+    const auto dst_dt = to_ckw(_dst->data_type());
+
+    // CKW constants
+    auto const_k_i32          = writer->declare_constant_tile(ckw::ConstantData({{k}}, ckw::DataType::Int32));
+    auto const_k0_i32         = writer->declare_constant_tile(ckw::ConstantData({{k0}}, ckw::DataType::Int32));
+    auto const_0_i32          = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
+    auto const_pos_1_i32      = writer->declare_constant_tile(ckw::ConstantData({{1}}, ckw::DataType::Int32));
+    auto const_0_fp           = writer->declare_constant_tile(ckw::ConstantData({{0.0f}}, dst_dt));
+    auto const_k_minus_k0_i32 = writer->declare_constant_tile(ckw::ConstantData({{k - k0}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 3 - Define the compute block parameters and destination tile (if not root component)
+     *     Bind the tile to the tensor to share it among different components and
+     *     initialize the compute block parameters
+     ********************************************************************************/
+    // The n0 and m0 parameters from root_window only refers to the output
+    const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
+
+    // Destination compute block size
+    const int32_t dst_n0 = root_window.x().step();
+    const int32_t dst_m0 = root_window.y().step();
+
+    // Destination compute block size left-over
+    const int32_t dst_n0_partial = _dst->dimension(0) % dst_n0;
+    const int32_t dst_m0_partial = _dst->dimension(1) % dst_m0;
+
+    // Shift-back for the overlapping-min strategy
+    const int32_t dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
+
+    ckw::TensorSampler sampler_dst;
+    sampler_dst.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    if (dst_n0_partial == 0)
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    }
+    else
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::OverlappingMin);
+    }
+
+    if (dst_m0_partial == 0)
+    {
+        sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    }
+    else
+    {
+        sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::ClampToBorderMaxOnly);
+    }
+
+    sampler_dst.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    sampler_dst.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+    // Declare destination tile
+    auto tile_dst = writer->declare_tile("dst", ckw::TileInfo(dst_dt, dst_m0, dst_n0));
+
+    // Initialize destination tile
+    writer->op_assign(tile_dst, const_0_fp);
+
+    // Bind tile to the tensor
+    dst->init_virtual_tensor(tile_dst, sampler_dst);
+
+    /********************************************************************************
+     * 4 - Define the compute block parameters CKW constants
+     ********************************************************************************/
+    // Only now we can declare the N0 and M0 as constant
+    auto const_dst_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_n0}}, ckw::DataType::Int32));
+    auto const_dst_m0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_m0}}, ckw::DataType::Int32));
+    auto const_shift_back_dst_n0_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{dst_shift_back}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 5 - Define the samplers for the input tensors
+     ********************************************************************************/
+    // LHS SAMPLER
+    // The assumption here is that M is multiple of M0. This limitation will be removed once
+    // we have the support for OverlappingMin as address mode for the Y direction
+    ckw::TensorSampler sampler_lhs;
+    sampler_lhs.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    sampler_lhs.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    sampler_lhs.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    sampler_lhs.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    sampler_lhs.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+    // RHS SAMPLER
+    ckw::TensorSampler sampler_rhs;
+    sampler_rhs.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    sampler_rhs.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    sampler_rhs.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    sampler_rhs.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    sampler_rhs.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+    /********************************************************************************
+     * 6 - Extra operations required before writing the main code (optional)
+     ********************************************************************************/
+
+    // Not required
+
+    /********************************************************************************
+     * 7 - Get the coordinates of the destination tile
+     ********************************************************************************/
+    auto tile_gid_0 = writer->declare_tile("gid_0", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_1 = writer->declare_tile("gid_1", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_2 = writer->declare_tile("gid_2", ckw::TileInfo(ckw::DataType::Int32));
+
+    writer->op_get_global_id(tile_gid_0, 0);
+    writer->op_get_global_id(tile_gid_1, 1);
+    writer->op_get_global_id(tile_gid_2, 2);
+
+    auto tile_idx_n = writer->declare_tile("idx_n", ckw::TileInfo(ckw::DataType::Int32)); // N index
+    auto tile_idx_m = writer->declare_tile("idx_m", ckw::TileInfo(ckw::DataType::Int32)); // M index
+    auto tile_idx_b = writer->declare_tile("idx_b", ckw::TileInfo(ckw::DataType::Int32)); // BATCH index
+
+    // Calculate coordinates
+    get_coordinate_from_gws_overlapping_min(writer, tile_idx_n, tile_gid_0, const_dst_n0_i32,
+                                            const_shift_back_dst_n0_i32, const_0_i32);
+    get_coordinate_from_gws(writer, tile_idx_m, tile_gid_1, const_dst_m0_i32);
+    get_coordinate_from_gws(writer, tile_idx_b, tile_gid_2, const_pos_1_i32);
+
+    /********************************************************************************
+     * 8 - Write the rest of the code
+     ********************************************************************************/
+    auto tile_idx_k = writer->declare_tile("idx_k", ckw::TileInfo(ckw::DataType::Int32)); // K index
+
+    writer->op_assign(tile_idx_k, const_0_i32);
+
+    // clang-format off
+    writer->op_for_loop(tile_idx_k, ckw::BinaryOp::LessEqual, const_k_minus_k0_i32, tile_idx_k, ckw::AssignmentOp::Increment, const_k0_i32,
+    [&]()
+    {
+        auto tile_lhs = writer->declare_tile("lhs", ckw::TileInfo(to_ckw(_lhs->data_type()), dst_m0, k0));
+        auto tile_rhs = writer->declare_tile("rhs", ckw::TileInfo(to_ckw(_rhs->data_type()), dst_n0, k0));
+        writer->op_assign(tile_lhs, const_0_fp);
+        writer->op_assign(tile_rhs, const_0_fp);
+
+        writer->op_load(tile_lhs, lhs->tensor(), sampler_lhs, tile_idx_k, tile_idx_m, tile_idx_b, const_0_i32);
+        writer->op_load(tile_rhs, rhs->tensor(), sampler_rhs, tile_idx_k, tile_idx_n, tile_idx_b, const_0_i32);
+
+        writer->op_binary(tile_dst, ckw::BinaryOp::MatMul_Nt_T, tile_lhs, tile_rhs);
+
+    });
+
+    // Left-over accumulations for when K is not a multiple of k0
+    if(((k % k0) != 0))
+    {
+        writer->op_for_loop(tile_idx_k, ckw::BinaryOp::Less, const_k_i32, tile_idx_k, ckw::AssignmentOp::Increment, const_pos_1_i32, [&]()
+        {
+            auto tile_lhs = writer->declare_tile("lhs", ckw::TileInfo(to_ckw(_lhs->data_type()), dst_m0, 1));
+            auto tile_rhs = writer->declare_tile("rhs", ckw::TileInfo(to_ckw(_rhs->data_type()), dst_n0, 1));
+            writer->op_assign(tile_lhs, const_0_fp);
+            writer->op_assign(tile_rhs, const_0_fp);
+
+            writer->op_load(tile_lhs, lhs->tensor(), sampler_lhs, tile_idx_k, tile_idx_m, tile_idx_b, const_0_i32);
+            writer->op_load(tile_rhs, rhs->tensor(), sampler_rhs, tile_idx_k, tile_idx_n, tile_idx_b, const_0_i32);
+
+            writer->op_binary(tile_dst, ckw::BinaryOp::MatMul_Nt_T, tile_lhs, tile_rhs);
+        });
+    }
+    // clang-format on
+}
+
+Window GpuCkwMatMul::get_window() const
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+
+    const int32_t m       = _dst->dimension(1);
+    const int32_t n       = _dst->dimension(0);
+    const bool    adj_lhs = _attributes.adj_lhs();
+
+    const int32_t m0 = adj_lhs ? adjust_vec_size(_settings.m0(), m) : std::min(_settings.m0(), m);
+    const int32_t n0 = adjust_vec_size(_settings.n0(), n);
+
+    // Configure kernel window
+    Window win = calculate_max_window(_dst->tensor_shape(), Steps(n0, m0));
+    win        = win.collapse(win, Window::DimZ);
+
+    return win;
+}
+
+std::string GpuCkwMatMul::get_name(const ComponentGroup &comp_group) const
+{
+    ARM_COMPUTE_UNUSED(comp_group);
+
+    std::string kernel_name("mat_mul_native");
+
+    const int32_t m = _dst->dimension(1);
+    const int32_t n = _dst->dimension(0);
+    const int32_t k = _attributes.adj_lhs() ? _lhs->tensor_shape().y() : _lhs->tensor_shape().x();
+
+    kernel_name += _attributes.adj_lhs() ? "_t" : "_nt";
+    kernel_name += _attributes.adj_rhs() ? "_t" : "_nt";
+    kernel_name += "_";
+    kernel_name += support::cpp11::to_string(m);
+    kernel_name += "_";
+    kernel_name += support::cpp11::to_string(n);
+    kernel_name += "_";
+    kernel_name += support::cpp11::to_string(k);
+    kernel_name += "_";
+    kernel_name += support::cpp11::to_string(_dst->dimension(2));
+    kernel_name += "_";
+    kernel_name += support::cpp11::to_string(_settings.m0());
+    kernel_name += "_";
+    kernel_name += support::cpp11::to_string(_settings.n0());
+    kernel_name += "_";
+    kernel_name += support::cpp11::to_string(_settings.k0());
+
+    return kernel_name;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h
new file mode 100644
index 0000000000..790418bf50
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
+
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwMatMul final : public IGpuCkwComponentDriver
+{
+public:
+    using Attributes = ClComponentMatMul::Attributes;
+    using Settings   = ClComponentMatMul::Settings;
+
+public:
+    /** Constructor
+     *
+     * For supported configurations please refer to @ref ClComponentMatMul::validate()
+     *
+     * @param[in] id         Component id
+     * @param[in] tensors    Tensor arguments to the component
+     * @param[in] attributes Component attributes. Attributes are a set of parameters that define what a component does
+     * @param[in] settings   Component settings. Settings are a set of parameters that influence the implementation of a component
+     */
+    GpuCkwMatMul(ComponentId                      id,
+                 const ArgumentPack<ITensorInfo> &tensors,
+                 const Attributes                &attributes,
+                 const Settings                  &settings);
+
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwMatMul);
+
+    /** Destructor */
+    ~GpuCkwMatMul() override = default;
+
+    // Inherited methods overriden
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+    std::string  get_name(const ComponentGroup &comp_group) const override;
+
+private:
+    const ITensorInfo *_lhs;
+    const ITensorInfo *_rhs;
+    const ITensorInfo *_dst;
+
+    Attributes _attributes;
+    Settings   _settings;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWMATMUL_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
new file mode 100644
index 0000000000..d027f348ef
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
+#include "compute_kernel_writer/include/ckw/KernelWriter.h"
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuCkwPool2d::GpuCkwPool2d(ComponentId                      id,
+                           const ArgumentPack<ITensorInfo> &tensors,
+                           const Attributes                &attributes,
+                           const Settings                  &settings)
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings}
+
+{
+    _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
+}
+
+void GpuCkwPool2d::write_component_code(const ComponentGroup    &comp_group,
+                                        GpuCkwVariableTable     &vtable,
+                                        GpuCkwScopedKernelWriter writer) const
+{
+    const uint32_t width_idx  = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::WIDTH);
+    const uint32_t height_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::HEIGHT);
+
+    /********************************************************************************
+     * 1 - Define tensors
+     ********************************************************************************/
+    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
+    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+
+    /********************************************************************************
+     * 2 - Define CKW constants
+     ********************************************************************************/
+    const auto dst_dt    = to_ckw(_dst->data_type());
+    const auto pool_sz_x = static_cast<int32_t>(_attributes.pool_size().x());
+    const auto pool_sz_y = static_cast<int32_t>(_attributes.pool_size().y());
+    const auto pad_x     = static_cast<int32_t>(_attributes.pad().left);
+    const auto pad_y     = static_cast<int32_t>(_attributes.pad().top);
+    const auto stride_x  = static_cast<int32_t>(_attributes.stride().x());
+    const auto stride_y  = static_cast<int32_t>(_attributes.stride().y());
+    const auto src_w     = static_cast<int32_t>(_src->dimension(width_idx));
+    const auto src_h     = static_cast<int32_t>(_src->dimension(height_idx));
+    const auto dst_h     = static_cast<int32_t>(_dst->dimension(height_idx));
+
+    // CKW constants
+    auto const_pool_sz_x_i32 = writer->declare_constant_tile(ckw::ConstantData({{pool_sz_x}}, ckw::DataType::Int32));
+    auto const_pool_sz_y_i32 = writer->declare_constant_tile(ckw::ConstantData({{pool_sz_y}}, ckw::DataType::Int32));
+    auto const_pad_x_i32     = writer->declare_constant_tile(ckw::ConstantData({{pad_x}}, ckw::DataType::Int32));
+    auto const_pad_y_i32     = writer->declare_constant_tile(ckw::ConstantData({{pad_y}}, ckw::DataType::Int32));
+    auto const_stride_x_i32  = writer->declare_constant_tile(ckw::ConstantData({{stride_x}}, ckw::DataType::Int32));
+    auto const_stride_y_i32  = writer->declare_constant_tile(ckw::ConstantData({{stride_y}}, ckw::DataType::Int32));
+    auto const_src_w_i32     = writer->declare_constant_tile(ckw::ConstantData({{src_w}}, ckw::DataType::Int32));
+    auto const_src_h_i32     = writer->declare_constant_tile(ckw::ConstantData({{src_h}}, ckw::DataType::Int32));
+    auto const_dst_h_i32     = writer->declare_constant_tile(ckw::ConstantData({{dst_h}}, ckw::DataType::Int32));
+    auto const_0_i32         = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
+    auto const_pos_1_i32     = writer->declare_constant_tile(ckw::ConstantData({{1}}, ckw::DataType::Int32));
+    auto const_0_fp          = writer->declare_constant_tile(ckw::ConstantData({{0.0f}}, dst_dt));
+    auto const_lowest_val_fp =
+        writer->declare_constant_tile(ckw::ConstantData({{std::numeric_limits<float>::lowest()}}, ckw::DataType::Fp32));
+    auto const_neg_inf_val_fp = writer->declare_constant_tile(ckw::ConstantData({{-1.0f / 0.0f}}, ckw::DataType::Fp32));
+
+    /********************************************************************************
+     * 3 - Define the compute block parameters and destination tile (if not root component)
+     *     Bind the tile to the tensor to share it among different components and
+     *     initialize the compute block parameters
+     ********************************************************************************/
+    // The n0 and m0 parameters from root_window only refers to the output
+    const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
+
+    // Destination compute block size
+    const int32_t dst_n0 = root_window.x().step();
+    const int32_t dst_m0 = root_window.y().step();
+
+    // Destination compute block size left-over
+    const int32_t dst_n0_partial = _dst->dimension(0) % dst_n0;
+    const int32_t dst_m0_partial = _dst->dimension(1) % dst_m0;
+
+    // Shift-back for the overlapping-min strategy
+    const int32_t dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
+
+    ckw::TensorSampler sampler_dst;
+    sampler_dst.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    if (dst_n0_partial == 0)
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    }
+    else
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::OverlappingMin);
+    }
+
+    if (dst_m0_partial == 0)
+    {
+        sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    }
+    else
+    {
+        sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::ClampToBorderMaxOnly);
+    }
+
+    sampler_dst.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    sampler_dst.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+    // Declare destination tile
+    auto tile_dst = writer->declare_tile("dst", ckw::TileInfo(dst_dt, dst_m0, dst_n0));
+
+    // Initialize destination tile
+    writer->op_assign(tile_dst, const_0_fp);
+
+    // Bind tile to the tensor
+    dst->init_virtual_tensor(tile_dst, sampler_dst);
+
+    /********************************************************************************
+     * 4 - Define the compute block parameters CKW constants
+     ********************************************************************************/
+    // Only now we can declare the N0 and M0 as constant
+    auto const_dst_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_n0}}, ckw::DataType::Int32));
+    auto const_dst_m0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_m0}}, ckw::DataType::Int32));
+    auto const_shift_back_dst_n0_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{dst_shift_back}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 5 - Define the sampler for the input tensor
+     ********************************************************************************/
+    ckw::TensorSampler sampler_src;
+    sampler_src.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    sampler_src.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    sampler_src.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    sampler_src.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+
+    /********************************************************************************
+     * 6 - Extra operations required before writing the main code
+     ********************************************************************************/
+    // Check if it is global pooling
+    const bool is_global_pooling = (pool_sz_x == src_w) && (pool_sz_y == src_h) && (pad_x == 0) && (pad_y == 0);
+
+    // Accumulate always in F32 if the pool type is not MAX
+    const bool acc_f32 = (dst_dt == ckw::DataType::Fp32) ||
+                         ((dst_dt == ckw::DataType::Fp16) && _attributes.pool_type() != PoolingType::MAX);
+
+    const auto acc_dt = acc_f32 ? ckw::DataType::Fp32 : ckw::DataType::Fp16;
+
+    const bool is_wider_acc = dst_dt != acc_dt;
+
+    /********************************************************************************
+     * 7 - Get the coordinates of the destination tile
+     ********************************************************************************/
+    auto tile_gid_0 = writer->declare_tile("gid_0", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_1 = writer->declare_tile("gid_1", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_2 = writer->declare_tile("gid_2", ckw::TileInfo(ckw::DataType::Int32));
+
+    writer->op_get_global_id(tile_gid_0, 0);
+    writer->op_get_global_id(tile_gid_1, 1);
+    writer->op_get_global_id(tile_gid_2, 2);
+
+    auto tile_cout0 = writer->declare_tile("cout0", ckw::TileInfo(ckw::DataType::Int32)); // OFM
+    auto tile_mout0 = writer->declare_tile("mout0", ckw::TileInfo(ckw::DataType::Int32)); // WIDTH
+    auto tile_mout1 = writer->declare_tile("mout1", ckw::TileInfo(ckw::DataType::Int32)); // HEIGHT
+    auto tile_bout0 = writer->declare_tile("bout0", ckw::TileInfo(ckw::DataType::Int32)); // BATCH SIZE IDX
+
+    // Calculate coordinates
+    get_coordinate_from_gws_overlapping_min(writer, tile_cout0, tile_gid_0, const_dst_n0_i32,
+                                            const_shift_back_dst_n0_i32, const_0_i32);
+    get_coordinate_from_gws(writer, tile_mout0, tile_gid_1, const_dst_m0_i32);
+    writer->op_binary(tile_mout1, ckw::BinaryOp::Mod, tile_gid_2, const_dst_h_i32);
+    writer->op_binary(tile_bout0, ckw::BinaryOp::Div, tile_gid_2, const_dst_h_i32);
+
+    /********************************************************************************
+     * 8 - Write the rest of the code
+     ********************************************************************************/
+    // A tile used to temporarily store results or as an accumulator in case of AVG and L2 pooling.
+    auto tile_res = writer->declare_tile("tile_res", ckw::TileInfo(acc_dt, dst_m0, dst_n0));
+
+    // Initialise result tile with appropriate value
+    if (_attributes.pool_type() == PoolingType::MAX)
+    {
+        if (_settings.use_inf_as_limit())
+        {
+            writer->op_cast(tile_res, const_neg_inf_val_fp, ckw::ConvertPolicy::None);
+        }
+        else
+        {
+            writer->op_cast(tile_res, const_lowest_val_fp, ckw::ConvertPolicy::None);
+        }
+    }
+    else
+    {
+        writer->op_cast(tile_res, const_0_fp, ckw::ConvertPolicy::None);
+    }
+
+    // tile_idx_in_w = tile_mout0 * STRIDE_X - PAD_X
+    auto tile_src_coord_x_start = writer->declare_tile("idx_in_w", ckw::DataType::Int32);
+    writer->op_binary(tile_src_coord_x_start, ckw::BinaryOp::Mul, tile_mout0, const_stride_x_i32);
+    writer->op_binary(tile_src_coord_x_start, ckw::BinaryOp::Sub, tile_src_coord_x_start, const_pad_x_i32);
+
+    // tile_idx_in_h = tile_mout1 * STRIDE_Y - PAD_Y
+    auto tile_src_coord_y_start = writer->declare_tile("idx_in_h", ckw::DataType::Int32);
+    writer->op_binary(tile_src_coord_y_start, ckw::BinaryOp::Mul, tile_mout1, const_stride_y_i32);
+    writer->op_binary(tile_src_coord_y_start, ckw::BinaryOp::Sub, tile_src_coord_y_start, const_pad_y_i32);
+
+    auto tile_neg_src_coord_x_start = writer->declare_tile("neg_src_coord_x_start", ckw::DataType::Int32);
+    auto tile_neg_src_coord_y_start = writer->declare_tile("neg_src_coord_y_start", ckw::DataType::Int32);
+
+    writer->op_binary(tile_neg_src_coord_x_start, ckw::BinaryOp::Sub, const_0_i32, tile_src_coord_x_start);
+    writer->op_binary(tile_neg_src_coord_y_start, ckw::BinaryOp::Sub, const_0_i32, tile_src_coord_y_start);
+
+    // int pool_x_s = max((int)0, -idx_in_w);
+    // int pool_x_e = min((int)POOL_SIZE_X, (int)SRC_WIDTH - idx_in_w);
+    // int pool_y_s = max((int)0, -idx_in_h);
+    // int pool_y_e = min((int)POOL_SIZE_Y, (int)SRC_HEIGHT - idx_in_h);
+    auto tile_pool_x_s = writer->declare_tile("pool_x_s", ckw::DataType::Int32);
+    auto tile_pool_y_s = writer->declare_tile("pool_y_s", ckw::DataType::Int32);
+    auto tile_pool_x_e = writer->declare_tile("pool_x_e", ckw::DataType::Int32);
+    auto tile_pool_y_e = writer->declare_tile("pool_y_e", ckw::DataType::Int32);
+
+    writer->op_binary(tile_pool_x_s, ckw::BinaryOp::Max, const_0_i32, tile_neg_src_coord_x_start);
+    writer->op_binary(tile_pool_x_e, ckw::BinaryOp::Add, const_src_w_i32, tile_neg_src_coord_x_start);
+    writer->op_binary(tile_pool_x_e, ckw::BinaryOp::Min, const_pool_sz_x_i32, tile_pool_x_e);
+    writer->op_binary(tile_pool_y_s, ckw::BinaryOp::Max, const_0_i32, tile_neg_src_coord_y_start);
+    writer->op_binary(tile_pool_y_e, ckw::BinaryOp::Add, const_src_h_i32, tile_neg_src_coord_y_start);
+    writer->op_binary(tile_pool_y_e, ckw::BinaryOp::Min, const_pool_sz_y_i32, tile_pool_y_e);
+
+    // #if defined(EXCLUDE_PADDING)
+    // int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s);
+    // #else // defined(EXCLUDE_PADDING)
+    // int filter_size = POOL_SIZE_X * POOL_SIZE_Y;
+    // #endif // defined(EXCLUDE_PADDING)
+    auto tile_filter_size = writer->declare_tile("filter_size", ckw::DataType::Int32);
+    if (_attributes.exclude_padding())
+    {
+        auto tile_x_diff = writer->declare_tile("x_diff", ckw::DataType::Int32);
+        auto tile_y_diff = writer->declare_tile("y_diff", ckw::DataType::Int32);
+
+        writer->op_binary(tile_x_diff, ckw::BinaryOp::Sub, tile_pool_x_e, tile_pool_x_s);
+        writer->op_binary(tile_y_diff, ckw::BinaryOp::Sub, tile_pool_y_e, tile_pool_y_s);
+        writer->op_binary(tile_filter_size, ckw::BinaryOp::Mul, tile_x_diff, tile_y_diff);
+    }
+    else
+    {
+        writer->op_binary(tile_filter_size, ckw::BinaryOp::Mul, const_pool_sz_x_i32, const_pool_sz_y_i32);
+    }
+
+    auto tile_x = writer->declare_tile("x", ckw::DataType::Int32);
+    auto tile_y = writer->declare_tile("y", ckw::DataType::Int32);
+
+    if (is_global_pooling)
+    {
+        writer->op_assign(tile_y, const_0_i32);
+        writer->op_assign(tile_pool_y_e, const_pool_sz_y_i32);
+    }
+    else
+    {
+        writer->op_assign(tile_y, tile_pool_y_s);
+    }
+
+    // Y dim for-loop
+    writer->op_for_loop(
+        tile_y, ckw::BinaryOp::Less, tile_pool_y_e, tile_y, ckw::AssignmentOp::Increment, const_pos_1_i32,
+        [&]()
+        {
+            // Reset the iterator for the inner loop
+            if (is_global_pooling)
+            {
+                writer->op_assign(tile_x, const_0_i32);
+                writer->op_assign(tile_pool_x_e, const_pool_sz_x_i32);
+            }
+            else
+            {
+                writer->op_assign(tile_x, tile_pool_x_s);
+            }
+
+            auto tile_src_coord_y = writer->declare_tile("src_coord_y", ckw::DataType::Int32);
+            writer->op_binary(tile_src_coord_y, ckw::BinaryOp::Add, tile_src_coord_y_start, tile_y);
+
+            // X dim for-loop
+            writer->op_for_loop(
+                tile_x, ckw::BinaryOp::Less, tile_pool_x_e, tile_x, ckw::AssignmentOp::Increment, const_pos_1_i32,
+                [&]()
+                {
+                    auto tile_src_coord_x = writer->declare_tile("src_coord_x", ckw::DataType::Int32);
+                    writer->op_binary(tile_src_coord_x, ckw::BinaryOp::Add, tile_src_coord_x_start, tile_x);
+
+                    ckw::DataType src_dt   = to_ckw(_src->data_type());
+                    auto          tile_src = writer->declare_tile("tile_src", ckw::TileInfo(acc_dt, dst_m0, dst_n0));
+
+                    // Load src tile
+                    if (is_wider_acc)
+                    {
+                        auto tile_src0 = writer->declare_tile("src_tile0", ckw::TileInfo(src_dt, dst_m0, dst_n0));
+                        writer->op_load(tile_src0, src->tensor(), sampler_src, tile_cout0, tile_src_coord_x,
+                                        tile_src_coord_y, tile_bout0);
+                        writer->op_cast(tile_src, tile_src0, ckw::ConvertPolicy::None);
+                    }
+                    else
+                    {
+                        writer->op_load(tile_src, src->tensor(), sampler_src, tile_cout0, tile_src_coord_x,
+                                        tile_src_coord_y, tile_bout0);
+                    }
+
+                    // Take the square of the input, for L2 Pooling
+                    if (_attributes.pool_type() == PoolingType::L2)
+                    {
+                        writer->op_binary(tile_src, ckw::BinaryOp::Mul, tile_src, tile_src);
+                    }
+
+                    // Perfom Pooling op
+                    if (_attributes.pool_type() == PoolingType::MAX)
+                    {
+                        writer->op_binary(tile_res, ckw::BinaryOp::Max, tile_res, tile_src);
+                    }
+                    else
+                    {
+                        writer->op_binary(tile_res, ckw::BinaryOp::Add, tile_res, tile_src);
+                    }
+                });
+        });
+
+    if ((_attributes.pool_type() == PoolingType::AVG) || (_attributes.pool_type() == PoolingType::L2))
+    {
+        // Filter_size is automatically broadcasted in the operation
+        auto tile_filter_size_fp = writer->declare_tile("filter_size_fp", ckw::TileInfo(acc_dt));
+        writer->op_cast(tile_filter_size_fp, tile_filter_size, ckw::ConvertPolicy::None);
+        writer->op_binary(tile_res, ckw::BinaryOp::Div, tile_res, tile_filter_size_fp);
+    }
+
+    // Take square root of the result in L2 pooling
+    if (_attributes.pool_type() == PoolingType::L2)
+    {
+        writer->op_unary(tile_res, ckw::UnaryOp::Sqrt, tile_res);
+    }
+
+    // Store the results and do casting if mixed precision
+    if (is_wider_acc)
+    {
+        writer->op_cast(tile_dst, tile_res, ckw::ConvertPolicy::None);
+    }
+    else
+    {
+        writer->op_assign(tile_dst, tile_res);
+    }
+}
+
+Window GpuCkwPool2d::get_window() const
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+
+    TensorShape    output_shape = _dst->tensor_shape();
+    const uint32_t vec_size     = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
+    // Create and configure kernel window
+    auto win = calculate_max_window(output_shape, Steps(vec_size));
+    win      = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size.
+    return win;
+}
+
+std::string GpuCkwPool2d::get_name(const ComponentGroup &comp_group) const
+{
+    ARM_COMPUTE_UNUSED(comp_group);
+
+    return "pool2dMxN";
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h
new file mode 100644
index 0000000000..822282a108
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWPOOL2D_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWPOOL2D_H
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
+
+#include <string>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwPool2d : public IGpuCkwComponentDriver
+{
+public:
+    using Attributes = ClComponentPool2d::Attributes;
+    using Settings   = ClComponentPool2d::Settings;
+
+    /** Constructor
+     *
+     * For supported configurations please refer to @ref ClComponentCast::validate()
+     *
+     * @param[in] id         Component id
+     * @param[in] tensors    Tensor arguments to the component
+     * @param[in] attributes Component attributes
+     * @param[in] settings   Component settings
+     */
+    GpuCkwPool2d(ComponentId                      id,
+                 const ArgumentPack<ITensorInfo> &tensors,
+                 const Attributes                &attributes,
+                 const Settings                  &settings);
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwPool2d);
+    /** Destructor */
+    ~GpuCkwPool2d() override = default;
+    // Inherited methods overriden:
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+    std::string  get_name(const ComponentGroup &comp_group) const override;
+
+private:
+    const ITensorInfo *_src;
+    const ITensorInfo *_dst;
+    Attributes         _attributes;
+    Settings           _settings;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWPOOL2D_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp
new file mode 100644
index 0000000000..edd7ea9a38
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp
@@ -0,0 +1,576 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
+
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/ScaleUtils.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "support/StringSupport.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+constexpr uint32_t opencl_vector_size_in_bytes = 16;
+} // namespace
+
+GpuCkwResize::GpuCkwResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
+{
+    _src = this->tensors().get_const_tensor(TensorType::ACL_SRC);
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
+}
+
+void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup    &comp_group,
+                                              GpuCkwVariableTable     &vtable,
+                                              GpuCkwScopedKernelWriter writer) const
+{
+    const uint32_t width_idx  = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::WIDTH);
+    const uint32_t height_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::HEIGHT);
+
+    /********************************************************************************
+     * 1 - Define tensors
+     ********************************************************************************/
+    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
+    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+
+    /********************************************************************************
+     * 2 - Define CKW constants
+     ********************************************************************************/
+    const auto  dst_dt  = to_ckw(_dst->data_type());
+    const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx),
+                                                              _attributes.align_corners());
+    const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx),
+                                                              _attributes.align_corners());
+    const auto  src_w   = static_cast<int32_t>(_src->dimension(width_idx));
+    const auto  src_h   = static_cast<int32_t>(_src->dimension(height_idx));
+    const auto  dst_h   = static_cast<int32_t>(_dst->dimension(height_idx));
+
+    // CKW constants
+    auto const_src_w_i32  = writer->declare_constant_tile(ckw::ConstantData({{src_w}}, ckw::DataType::Int32));
+    auto const_src_h_i32  = writer->declare_constant_tile(ckw::ConstantData({{src_h}}, ckw::DataType::Int32));
+    auto const_dst_h_i32  = writer->declare_constant_tile(ckw::ConstantData({{dst_h}}, ckw::DataType::Int32));
+    auto const_pos_1_i32  = writer->declare_constant_tile(ckw::ConstantData({{1}}, ckw::DataType::Int32));
+    auto const_0_i32      = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
+    auto const_0_fp       = writer->declare_constant_tile(ckw::ConstantData({{0.0f}}, dst_dt));
+    auto const_pos_0_5_fp = writer->declare_constant_tile(ckw::ConstantData({{0.5f}}, ckw::DataType::Fp32));
+    auto const_scale_x_fp = writer->declare_constant_tile(ckw::ConstantData({{scale_x}}, ckw::DataType::Fp32));
+    auto const_scale_y_fp = writer->declare_constant_tile(ckw::ConstantData({{scale_y}}, ckw::DataType::Fp32));
+
+    /********************************************************************************
+     * 3 - Define the compute block parameters and destination tile (if not root component)
+     *     Bind the tile to the tensor to share it among different components and
+     *     initialize the compute block parameters
+     ********************************************************************************/
+    // The n0 and m0 parameters from root_window only refers to the output
+    const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
+
+    // Destination compute block size
+    const int32_t dst_n0 = root_window.x().step();
+
+    // dst_m0 must be 1
+    ARM_COMPUTE_ERROR_ON(root_window.y().step() != 1);
+
+    // Destination compute block size left-over
+    const int32_t dst_n0_partial = _dst->dimension(0) % dst_n0;
+
+    // Shift-back for the overlapping-min strategy
+    const int32_t dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
+
+    ckw::TensorSampler sampler_dst;
+    sampler_dst.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    if (dst_n0_partial == 0)
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    }
+    else
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::OverlappingMin);
+    }
+    sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    sampler_dst.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    sampler_dst.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+    // Declare destination tile
+    auto tile_dst = writer->declare_tile("dst", ckw::TileInfo(dst_dt, 1, dst_n0));
+
+    // Initialize destination tile
+    writer->op_assign(tile_dst, const_0_fp);
+
+    // Bind tile to the tensor
+    dst->init_virtual_tensor(tile_dst, sampler_dst);
+
+    /********************************************************************************
+     * 4 - Define the compute block parameters CKW constants
+     ********************************************************************************/
+    auto const_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_n0}}, ckw::DataType::Int32));
+    auto const_shift_back_n0_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{dst_shift_back}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 5 - Define the samplers for the input tensor
+     ********************************************************************************/
+    ckw::TensorSampler sampler_src;
+    sampler_src.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    sampler_src.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    sampler_src.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    sampler_src.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+
+    /********************************************************************************
+     * 6 - Extra operations required before writing the main code
+     ********************************************************************************/
+
+    // ....
+
+    /********************************************************************************
+     * 7 - Get the coordinates of the destination tile
+     ********************************************************************************/
+    auto tile_gid_0 = writer->declare_tile("gid_0", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_1 = writer->declare_tile("gid_1", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_2 = writer->declare_tile("gid_2", ckw::TileInfo(ckw::DataType::Int32));
+
+    writer->op_get_global_id(tile_gid_0, 0);
+    writer->op_get_global_id(tile_gid_1, 1);
+    writer->op_get_global_id(tile_gid_2, 2);
+
+    auto tile_co = writer->declare_tile("co", ckw::TileInfo(ckw::DataType::Int32)); // OFM
+    auto tile_xo = writer->declare_tile("xo", ckw::TileInfo(ckw::DataType::Int32)); // WIDTH
+    auto tile_yo = writer->declare_tile("yo", ckw::TileInfo(ckw::DataType::Int32)); // HEIGHT
+    auto tile_bo = writer->declare_tile("bo", ckw::TileInfo(ckw::DataType::Int32)); // BATCH SIZE IDX
+
+    // Calculate coordinates
+    get_coordinate_from_gws_overlapping_min(writer, tile_co, tile_gid_0, const_n0_i32, const_shift_back_n0_i32,
+                                            const_0_i32);
+    writer->op_assign(tile_xo, tile_gid_1);
+    writer->op_binary(tile_yo, ckw::BinaryOp::Mod, tile_gid_2, const_dst_h_i32);
+    writer->op_binary(tile_bo, ckw::BinaryOp::Div, tile_gid_2, const_dst_h_i32);
+
+    /********************************************************************************
+     * 8 - Write the rest of the code
+     ********************************************************************************/
+    auto tile_xi_f = writer->declare_tile("xi_f", ckw::DataType::Fp32);
+    auto tile_yi_f = writer->declare_tile("yi_f", ckw::DataType::Fp32);
+
+    switch (_attributes.sampling_policy())
+    {
+        case SamplingPolicy::TOP_LEFT:
+            // xi_f = (xo * scale_x)
+            // yi_f = (yo * scale_y)
+            writer->op_cast(tile_xi_f, tile_xo, ckw::ConvertPolicy::None);
+            writer->op_cast(tile_yi_f, tile_yo, ckw::ConvertPolicy::None);
+            writer->op_binary(tile_xi_f, ckw::BinaryOp::Mul, tile_xi_f, const_scale_x_fp);
+            writer->op_binary(tile_yi_f, ckw::BinaryOp::Mul, tile_yi_f, const_scale_y_fp);
+            break;
+        case SamplingPolicy::CENTER:
+        {
+            // xi_f = ((xo + 0.5f) * scale_x)
+            // yi_f = ((yo + 0.5f) * scale_y)
+            const auto &tile_xo_plus_half = writer->declare_tile("xo_plus_half", ckw::DataType::Fp32);
+            const auto &tile_yo_plus_half = writer->declare_tile("yo_plus_half", ckw::DataType::Fp32);
+
+            writer->op_cast(tile_xo_plus_half, tile_xo, ckw::ConvertPolicy::None);
+            writer->op_cast(tile_yo_plus_half, tile_yo, ckw::ConvertPolicy::None);
+            writer->op_binary(tile_xo_plus_half, ckw::BinaryOp::Add, tile_xo_plus_half, const_pos_0_5_fp);
+            writer->op_binary(tile_yo_plus_half, ckw::BinaryOp::Add, tile_yo_plus_half, const_pos_0_5_fp);
+            writer->op_binary(tile_xi_f, ckw::BinaryOp::Mul, tile_xo_plus_half, const_scale_x_fp);
+            writer->op_binary(tile_yi_f, ckw::BinaryOp::Mul, tile_yo_plus_half, const_scale_y_fp);
+        }
+        break;
+        default:
+            ARM_COMPUTE_ERROR("Unsupported sampling policy");
+    }
+
+    if (_attributes.align_corners())
+    {
+        writer->op_unary(tile_xi_f, ckw::UnaryOp::Round, tile_xi_f);
+        writer->op_unary(tile_yi_f, ckw::UnaryOp::Round, tile_yi_f);
+    }
+
+    // xi0 = clamp((int)xi_f, 0, (int)src_w - 1)
+    // yi0 = clamp((int)yi_f, 0, (int)src_h - 1)
+    auto tile_xi_f_int = writer->declare_tile("xi_f_int", ckw::DataType::Int32);
+    auto tile_yi_f_int = writer->declare_tile("yi_f_int", ckw::DataType::Int32);
+
+    writer->op_cast(tile_xi_f_int, tile_xi_f, ckw::ConvertPolicy::None);
+    writer->op_cast(tile_yi_f_int, tile_yi_f, ckw::ConvertPolicy::None);
+
+    auto tile_src_w_minus_1 = writer->declare_tile("src_w_minus_1", ckw::DataType::Int32);
+    auto tile_src_h_minus_1 = writer->declare_tile("src_h_minus_1", ckw::DataType::Int32);
+
+    writer->op_binary(tile_src_w_minus_1, ckw::BinaryOp::Sub, const_src_w_i32, const_pos_1_i32);
+    writer->op_binary(tile_src_h_minus_1, ckw::BinaryOp::Sub, const_src_h_i32, const_pos_1_i32);
+
+    auto tile_xi0 = writer->declare_tile("xi0", ckw::DataType::Int32);
+    auto tile_yi0 = writer->declare_tile("yi0", ckw::DataType::Int32);
+
+    writer->op_ternary(tile_xi0, ckw::TernaryOp::Clamp, tile_xi_f_int, const_0_i32, tile_src_w_minus_1);
+    writer->op_ternary(tile_yi0, ckw::TernaryOp::Clamp, tile_yi_f_int, const_0_i32, tile_src_h_minus_1);
+
+    auto tile_src = writer->declare_tile("src_tile", ckw::TileInfo(dst_dt, 1, dst_n0));
+    writer->op_load(tile_src, src->tensor(), sampler_src, tile_co, tile_xi0, tile_yi0, tile_bo);
+
+    writer->op_assign(tile_dst, tile_src);
+}
+
+void GpuCkwResize::do_bilinear_resize(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const
+{
+    const size_t width_idx  = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::WIDTH);
+    const size_t height_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::HEIGHT);
+
+    /********************************************************************************
+     * 1 - Define tensors
+     ********************************************************************************/
+    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
+    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+
+    /********************************************************************************
+     * 2 - Define CKW constants
+     ********************************************************************************/
+    const auto  dst_dt  = to_ckw(_dst->data_type());
+    const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx),
+                                                              _attributes.align_corners());
+    const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx),
+                                                              _attributes.align_corners());
+    const auto  src_w   = static_cast<int32_t>(_src->dimension(width_idx));
+    const auto  src_h   = static_cast<int32_t>(_src->dimension(height_idx));
+    const auto  dst_h   = static_cast<int32_t>(_dst->dimension(height_idx));
+
+    // CKW constants
+    auto const_src_w_i32  = writer->declare_constant_tile(ckw::ConstantData({{src_w}}, ckw::DataType::Int32));
+    auto const_src_h_i32  = writer->declare_constant_tile(ckw::ConstantData({{src_h}}, ckw::DataType::Int32));
+    auto const_dst_h_i32  = writer->declare_constant_tile(ckw::ConstantData({{dst_h}}, ckw::DataType::Int32));
+    auto const_pos_1_i32  = writer->declare_constant_tile(ckw::ConstantData({{1}}, ckw::DataType::Int32));
+    auto const_0_i32      = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
+    auto const_0_fp       = writer->declare_constant_tile(ckw::ConstantData({{0.0f}}, dst_dt));
+    auto const_pos_1_fp   = writer->declare_constant_tile(ckw::ConstantData({{1.0f}}, ckw::DataType::Fp32));
+    auto const_pos_0_5_fp = writer->declare_constant_tile(ckw::ConstantData({{0.5f}}, ckw::DataType::Fp32));
+    auto const_scale_x_fp = writer->declare_constant_tile(ckw::ConstantData({{scale_x}}, ckw::DataType::Fp32));
+    auto const_scale_y_fp = writer->declare_constant_tile(ckw::ConstantData({{scale_y}}, ckw::DataType::Fp32));
+
+    /********************************************************************************
+     * 3 - Define the compute block parameters and destination tile (if not root component)
+     *     Bind the tile to the tensor to share it among different components and
+     *     initialize the compute block parameters
+     ********************************************************************************/
+    // The n0 and m0 parameters from root_window only refers to the output
+    const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
+
+    // Destination compute block size
+    const int32_t dst_n0 = root_window.x().step();
+
+    // dst_m0 must be 1
+    ARM_COMPUTE_ERROR_ON(root_window.y().step() != 1);
+
+    // Destination compute block size left-over
+    const int32_t dst_n0_partial = _dst->dimension(0) % dst_n0;
+
+    // Shift-back for the overlapping-min strategy
+    const int32_t dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
+
+    ckw::TensorSampler sampler_dst;
+    sampler_dst.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    if (dst_n0_partial == 0)
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    }
+    else
+    {
+        sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::OverlappingMin);
+    }
+    sampler_dst.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    sampler_dst.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+    sampler_dst.storage(ckw::TensorStorageType::BufferUint8Ptr);
+
+    // Declare destination tile
+    auto tile_dst = writer->declare_tile("dst", ckw::TileInfo(dst_dt, 1, dst_n0));
+
+    // Initialize destination tile
+    writer->op_assign(tile_dst, const_0_fp);
+
+    // Bind tile to the tensor
+    dst->init_virtual_tensor(tile_dst, sampler_dst);
+
+    /********************************************************************************
+     * 4 - Define the compute block parameters CKW constants
+     ********************************************************************************/
+    auto const_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_n0}}, ckw::DataType::Int32));
+    auto const_shift_back_n0_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{dst_shift_back}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 5 - Define the sampler for the input tensor
+     ********************************************************************************/
+    ckw::TensorSampler sampler_src;
+    sampler_src.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
+    sampler_src.address_mode_x(ckw::TensorSamplerAddressModeX::None);
+    sampler_src.address_mode_y(ckw::TensorSamplerAddressModeY::None);
+    sampler_src.address_mode_z(ckw::TensorSamplerAddressModeZ::None);
+
+    /********************************************************************************
+     * 6 - Extra operations required before writing the main code
+     ********************************************************************************/
+
+    // ....
+
+    /********************************************************************************
+     * 7 - Get the coordinates of the destination tile
+     ********************************************************************************/
+    auto tile_gid_0 = writer->declare_tile("gid_0", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_1 = writer->declare_tile("gid_1", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_2 = writer->declare_tile("gid_2", ckw::TileInfo(ckw::DataType::Int32));
+
+    writer->op_get_global_id(tile_gid_0, 0);
+    writer->op_get_global_id(tile_gid_1, 1);
+    writer->op_get_global_id(tile_gid_2, 2);
+
+    auto tile_co = writer->declare_tile("co", ckw::TileInfo(ckw::DataType::Int32)); // OFM
+    auto tile_xo = writer->declare_tile("xo", ckw::TileInfo(ckw::DataType::Int32)); // WIDTH
+    auto tile_yo = writer->declare_tile("yo", ckw::TileInfo(ckw::DataType::Int32)); // HEIGHT
+    auto tile_bo = writer->declare_tile("bo", ckw::TileInfo(ckw::DataType::Int32)); // BATCH SIZE IDX
+
+    // Calculate coordinates
+    get_coordinate_from_gws_overlapping_min(writer, tile_co, tile_gid_0, const_n0_i32, const_shift_back_n0_i32,
+                                            const_0_i32);
+    writer->op_assign(tile_xo, tile_gid_1);
+    writer->op_binary(tile_yo, ckw::BinaryOp::Mod, tile_gid_2, const_dst_h_i32);
+    writer->op_binary(tile_bo, ckw::BinaryOp::Div, tile_gid_2, const_dst_h_i32);
+
+    /********************************************************************************
+     * 8 - Write the rest of the code
+     ********************************************************************************/
+    auto tile_xi_f = writer->declare_tile("xi_f", ckw::DataType::Fp32);
+    auto tile_yi_f = writer->declare_tile("yi_f", ckw::DataType::Fp32);
+
+    switch (_attributes.sampling_policy())
+    {
+        case SamplingPolicy::TOP_LEFT:
+            // xi_f = (xo * scale_x)
+            // yi_f = (yo * scale_y)
+            writer->op_cast(tile_xi_f, tile_xo, ckw::ConvertPolicy::None);
+            writer->op_cast(tile_yi_f, tile_yo, ckw::ConvertPolicy::None);
+            writer->op_binary(tile_xi_f, ckw::BinaryOp::Mul, tile_xi_f, const_scale_x_fp);
+            writer->op_binary(tile_yi_f, ckw::BinaryOp::Mul, tile_yi_f, const_scale_y_fp);
+            break;
+        case SamplingPolicy::CENTER:
+        {
+            // xi_f = ((xo + 0.5f) * scale_x - 0.5f)
+            // yi_f = ((yo + 0.5f) * scale_y - 0.5f)
+            const auto &tile_xo_plus_half = writer->declare_tile("xo_plus_half", ckw::DataType::Fp32);
+            const auto &tile_yo_plus_half = writer->declare_tile("yo_plus_half", ckw::DataType::Fp32);
+
+            writer->op_cast(tile_xo_plus_half, tile_xo, ckw::ConvertPolicy::None);
+            writer->op_cast(tile_yo_plus_half, tile_yo, ckw::ConvertPolicy::None);
+            writer->op_binary(tile_xo_plus_half, ckw::BinaryOp::Add, tile_xo_plus_half, const_pos_0_5_fp);
+            writer->op_binary(tile_yo_plus_half, ckw::BinaryOp::Add, tile_yo_plus_half, const_pos_0_5_fp);
+            writer->op_binary(tile_xi_f, ckw::BinaryOp::Mul, tile_xo_plus_half, const_scale_x_fp);
+            writer->op_binary(tile_yi_f, ckw::BinaryOp::Mul, tile_yo_plus_half, const_scale_y_fp);
+
+            writer->op_binary(tile_xi_f, ckw::BinaryOp::Sub, tile_xi_f, const_pos_0_5_fp);
+            writer->op_binary(tile_yi_f, ckw::BinaryOp::Sub, tile_yi_f, const_pos_0_5_fp);
+        }
+        break;
+        default:
+            ARM_COMPUTE_ERROR("Unsupported sampling policy");
+    }
+
+    // xi = (int)floor(xi_f);
+    // yi = (int)floor(yi_f);
+    auto tile_xi_f_floor = writer->declare_tile("xi_f_floor", ckw::DataType::Fp32);
+    auto tile_yi_f_floor = writer->declare_tile("yi_f_floor", ckw::DataType::Fp32);
+    writer->op_unary(tile_xi_f_floor, ckw::UnaryOp::Floor, tile_xi_f);
+    writer->op_unary(tile_yi_f_floor, ckw::UnaryOp::Floor, tile_yi_f);
+
+    auto tile_xi = writer->declare_tile("xi", ckw::DataType::Int32);
+    auto tile_yi = writer->declare_tile("yi", ckw::DataType::Int32);
+    writer->op_cast(tile_xi, tile_xi_f_floor, ckw::ConvertPolicy::None);
+    writer->op_cast(tile_yi, tile_yi_f_floor, ckw::ConvertPolicy::None);
+
+    // xi0  = clamp(xi, 0, (int)src_w - 1);
+    // yi0  = clamp(yi, 0, (int)src_h - 1);
+    // xi1  = clamp(xi + 1, 0, (int)src_w - 1);
+    // yi1  = clamp(yi + 1, 0, (int)src_h - 1);
+    auto tile_src_w_minus_1 = writer->declare_tile("src_w_minus_1", ckw::DataType::Int32);
+    auto tile_src_h_minus_1 = writer->declare_tile("src_h_minus_1", ckw::DataType::Int32);
+    writer->op_binary(tile_src_w_minus_1, ckw::BinaryOp::Sub, const_src_w_i32, const_pos_1_i32);
+    writer->op_binary(tile_src_h_minus_1, ckw::BinaryOp::Sub, const_src_h_i32, const_pos_1_i32);
+
+    auto tile_xi_plus_1 = writer->declare_tile("xi_plus_1", ckw::DataType::Int32);
+    auto tile_yi_plus_1 = writer->declare_tile("yi_plus_1", ckw::DataType::Int32);
+    writer->op_binary(tile_xi_plus_1, ckw::BinaryOp::Add, tile_xi, const_pos_1_i32);
+    writer->op_binary(tile_yi_plus_1, ckw::BinaryOp::Add, tile_yi, const_pos_1_i32);
+
+    auto tile_xi0 = writer->declare_tile("xi0", ckw::DataType::Int32);
+    auto tile_yi0 = writer->declare_tile("yi0", ckw::DataType::Int32);
+    auto tile_xi1 = writer->declare_tile("xi1", ckw::DataType::Int32);
+    auto tile_yi1 = writer->declare_tile("yi1", ckw::DataType::Int32);
+
+    writer->op_ternary(tile_xi0, ckw::TernaryOp::Clamp, tile_xi, const_0_i32, tile_src_w_minus_1);
+    writer->op_ternary(tile_yi0, ckw::TernaryOp::Clamp, tile_yi, const_0_i32, tile_src_h_minus_1);
+    writer->op_ternary(tile_xi1, ckw::TernaryOp::Clamp, tile_xi_plus_1, const_0_i32, tile_src_w_minus_1);
+    writer->op_ternary(tile_yi1, ckw::TernaryOp::Clamp, tile_yi_plus_1, const_0_i32, tile_src_h_minus_1);
+
+    auto tile_in00 = writer->declare_tile("in00", ckw::TileInfo(dst_dt, 1, dst_n0));
+    auto tile_in01 = writer->declare_tile("in01", ckw::TileInfo(dst_dt, 1, dst_n0));
+    auto tile_in10 = writer->declare_tile("in10", ckw::TileInfo(dst_dt, 1, dst_n0));
+    auto tile_in11 = writer->declare_tile("in11", ckw::TileInfo(dst_dt, 1, dst_n0));
+
+    writer->op_load(tile_in00, src->tensor(), sampler_src, tile_co, tile_xi0, tile_yi0, tile_bo);
+    writer->op_load(tile_in01, src->tensor(), sampler_src, tile_co, tile_xi1, tile_yi0, tile_bo);
+    writer->op_load(tile_in10, src->tensor(), sampler_src, tile_co, tile_xi0, tile_yi1, tile_bo);
+    writer->op_load(tile_in11, src->tensor(), sampler_src, tile_co, tile_xi1, tile_yi1, tile_bo);
+
+    // Weights of each nearest pixel
+    auto tile_a  = writer->declare_tile("a", ckw::DataType::Fp32);
+    auto tile_b  = writer->declare_tile("b", ckw::DataType::Fp32);
+    auto tile_a1 = writer->declare_tile("a1", ckw::DataType::Fp32);
+    auto tile_b1 = writer->declare_tile("b1", ckw::DataType::Fp32);
+
+    // a = (xi_f - (float)xi)
+    // b = (1.f - a)
+    // a1 = (yi_f - (float)yi)
+    // b1 = (1.f - a1)
+    auto tile_xi_float = writer->declare_tile("xi_float", ckw::DataType::Fp32);
+    auto tile_yi_float = writer->declare_tile("yi_float", ckw::DataType::Fp32);
+    writer->op_cast(tile_xi_float, tile_xi, ckw::ConvertPolicy::None);
+    writer->op_cast(tile_yi_float, tile_yi, ckw::ConvertPolicy::None);
+
+    writer->op_binary(tile_a, ckw::BinaryOp::Sub, tile_xi_f, tile_xi_float);
+    writer->op_binary(tile_b, ckw::BinaryOp::Sub, const_pos_1_fp, tile_a);
+    writer->op_binary(tile_a1, ckw::BinaryOp::Sub, tile_yi_f, tile_yi_float);
+    writer->op_binary(tile_b1, ckw::BinaryOp::Sub, const_pos_1_fp, tile_a1);
+
+    // Cast weights to source type
+    const auto &tile_a_src_type  = writer->declare_tile("a_src_t", to_ckw(_src->data_type()));
+    const auto &tile_b_src_type  = writer->declare_tile("b_src_t", to_ckw(_src->data_type()));
+    const auto &tile_a1_src_type = writer->declare_tile("a1_src_t", to_ckw(_src->data_type()));
+    const auto &tile_b1_src_type = writer->declare_tile("b1_src_t", to_ckw(_src->data_type()));
+
+    writer->op_cast(tile_a_src_type, tile_a, ckw::ConvertPolicy::None);
+    writer->op_cast(tile_b_src_type, tile_b, ckw::ConvertPolicy::None);
+    writer->op_cast(tile_a1_src_type, tile_a1, ckw::ConvertPolicy::None);
+    writer->op_cast(tile_b1_src_type, tile_b1, ckw::ConvertPolicy::None);
+
+    // in00 * b * b1
+    writer->op_binary(tile_in00, ckw::BinaryOp::Mul, tile_in00, tile_b_src_type);
+    writer->op_binary(tile_in00, ckw::BinaryOp::Mul, tile_in00, tile_b1_src_type);
+
+    // in01 * a * b1
+    writer->op_binary(tile_in01, ckw::BinaryOp::Mul, tile_in01, tile_a_src_type);
+    writer->op_binary(tile_in01, ckw::BinaryOp::Mul, tile_in01, tile_b1_src_type);
+
+    // in10 * b * a1
+    writer->op_binary(tile_in10, ckw::BinaryOp::Mul, tile_in10, tile_b_src_type);
+    writer->op_binary(tile_in10, ckw::BinaryOp::Mul, tile_in10, tile_a1_src_type);
+
+    // in11 * a * a1
+    writer->op_binary(tile_in11, ckw::BinaryOp::Mul, tile_in11, tile_a_src_type);
+    writer->op_binary(tile_in11, ckw::BinaryOp::Mul, tile_in11, tile_a1_src_type);
+
+    // Summation of above terms
+    writer->op_assign(tile_dst, tile_in00);
+    writer->op_binary(tile_dst, ckw::BinaryOp::Add, tile_dst, tile_in01);
+    writer->op_binary(tile_dst, ckw::BinaryOp::Add, tile_dst, tile_in10);
+    writer->op_binary(tile_dst, ckw::BinaryOp::Add, tile_dst, tile_in11);
+}
+
+void GpuCkwResize::write_component_code(const ComponentGroup    &comp_group,
+                                        GpuCkwVariableTable     &vtable,
+                                        GpuCkwScopedKernelWriter writer) const
+{
+    switch (_attributes.interpolation_policy())
+    {
+        case InterpolationPolicy::NEAREST_NEIGHBOR:
+            do_nearest_neighbor_resize(comp_group, vtable, writer);
+            break;
+        case InterpolationPolicy::BILINEAR:
+            do_bilinear_resize(comp_group, vtable, writer);
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Unsupported interpolation policy");
+    }
+}
+
+Window GpuCkwResize::get_window() const
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+
+    const uint32_t n0  = adjust_vec_size(opencl_vector_size_in_bytes / _src->element_size(), _src->dimension(0));
+    Window         win = calculate_max_window(*_dst, Steps(n0));
+    return win.collapse(win, Window::DimZ);
+}
+
+std::string GpuCkwResize::get_tuner_id(const ComponentGroup &comp_group) const
+{
+    ARM_COMPUTE_UNUSED(comp_group);
+
+    std::string tuner_id = "resize_";
+    tuner_id += _attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "nearest_neighbor" : "";
+    tuner_id += _attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "bilinear" : "";
+    tuner_id += "_";
+    tuner_id += _attributes.sampling_policy() == SamplingPolicy::CENTER ? "center" : "topleft";
+    tuner_id += "_";
+    tuner_id += support::cpp11::to_string(_dst->dimension(0));
+    tuner_id += "_";
+    tuner_id += support::cpp11::to_string(_dst->dimension(1));
+    tuner_id += "_";
+    tuner_id += support::cpp11::to_string(_dst->dimension(2));
+    tuner_id += "_";
+    tuner_id += support::cpp11::to_string(_dst->dimension(3));
+
+    return tuner_id;
+}
+
+std::string GpuCkwResize::get_name(const ComponentGroup &comp_group) const
+{
+    ARM_COMPUTE_UNUSED(comp_group);
+
+    std::string name = "resize_";
+    name += _attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "nearest_neighbor" : "";
+    name += _attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "bilinear" : "";
+
+    return name;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.h
new file mode 100644
index 0000000000..1266c05921
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWRESIZE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWRESIZE_H
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwResize final : public IGpuCkwComponentDriver
+{
+public:
+    using Attributes = ClComponentResize::Attributes;
+
+public:
+    /** Constructor
+     *
+     * @param[in] id         Component id
+     * @param[in] tensors    Tensor arguments to the components
+     * @param[in] attributes Component attributes
+     */
+    GpuCkwResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
+
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwResize);
+
+    /** Destructor */
+    ~GpuCkwResize() override = default;
+
+    // Inherited methods overriden
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+    std::string  get_name(const ComponentGroup &comp_group) const override;
+    std::string  get_tuner_id(const ComponentGroup &comp_group) const override;
+
+private:
+    /** Resize using nearest neighbor interpolation
+     *
+     * @param[in]      comp_group Component group to which this component belongs to
+     * @param[in, out] vtable     Table of variables declared by this component
+     * @param[in, out] writer     CKW writer that writes code scoped to this kernel component
+     */
+    void do_nearest_neighbor_resize(const ComponentGroup    &comp_group,
+                                    GpuCkwVariableTable     &vtable,
+                                    GpuCkwScopedKernelWriter writer) const;
+
+    /** Resize using bilinear interpolation
+     *
+     * @param[in]      comp_group Component group to which this component belongs to
+     * @param[in, out] vtable     Table of variables declared by this component
+     * @param[in, out] writer     CKW writer that writes code scoped to this kernel component
+     */
+    void do_bilinear_resize(const ComponentGroup    &comp_group,
+                            GpuCkwVariableTable     &vtable,
+                            GpuCkwScopedKernelWriter writer) const;
+
+    const ITensorInfo *_src;
+    const ITensorInfo *_dst;
+    Attributes         _attributes;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWRESIZE_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
new file mode 100644
index 0000000000..d9d741fea5
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuCkwStore.h"
+
+#include "arm_compute/core/Error.h"
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+
+#include <cstdint>
+#include <string>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}
+{
+    _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+}
+void GpuCkwStore::write_component_code(const ComponentGroup    &comp_group,
+                                       GpuCkwVariableTable     &vtable,
+                                       GpuCkwScopedKernelWriter writer) const
+{
+    /********************************************************************************
+     * 1 - Define tensors
+     ********************************************************************************/
+    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
+    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+
+    /********************************************************************************
+     * 2 - Define CKW constants
+     ********************************************************************************/
+    const auto dst_h = static_cast<int32_t>(_dst->dimension(2));
+
+    auto const_0_i32     = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
+    auto const_pos_1_i32 = writer->declare_constant_tile(ckw::ConstantData({{1}}, ckw::DataType::Int32));
+    auto const_dst_h_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_h}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 3 - Define the compute block parameters and destination tile (if not root component)
+     *     Bind the tile to the tensor to share it among different components and
+     *     initialize the compute block parameters
+     ********************************************************************************/
+    const auto &tile_src    = src->tile();
+    auto       &sampler_src = src->tensor_sampler();
+
+    const auto    dst_n0         = static_cast<int32_t>(tile_src.tile_info().width());
+    const auto    dst_m0         = static_cast<int32_t>(tile_src.tile_info().height());
+    const int32_t dst_n0_partial = _dst->dimension(0) % dst_n0;
+    const int32_t dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
+
+    /********************************************************************************
+     * 4 - Define the compute block parameters CKW constants
+     ********************************************************************************/
+    auto const_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_n0}}, ckw::DataType::Int32));
+    auto const_m0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_m0}}, ckw::DataType::Int32));
+    auto const_shift_back_n0_i32 =
+        writer->declare_constant_tile(ckw::ConstantData({{dst_shift_back}}, ckw::DataType::Int32));
+
+    /********************************************************************************
+     * 5 - Define the samplers for the input tensor
+     ********************************************************************************/
+    // Not required
+
+    /********************************************************************************
+     * 6 - Extra operations required before writing the main code
+     ********************************************************************************/
+    // Not required
+
+    /********************************************************************************
+     * 7 - Get the coordinates of the destination tile
+     ********************************************************************************/
+    auto tile_gid_0 = writer->declare_tile("gid_0", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_1 = writer->declare_tile("gid_1", ckw::TileInfo(ckw::DataType::Int32));
+    auto tile_gid_2 = writer->declare_tile("gid_2", ckw::TileInfo(ckw::DataType::Int32));
+
+    writer->op_get_global_id(tile_gid_0, 0);
+    writer->op_get_global_id(tile_gid_1, 1);
+    writer->op_get_global_id(tile_gid_2, 2);
+
+    auto tile_nout0 = writer->declare_tile("cout0", ckw::TileInfo(ckw::DataType::Int32)); // OFM
+    auto tile_mout0 = writer->declare_tile("mout0", ckw::TileInfo(ckw::DataType::Int32)); // WIDTH or WIDTH x HEIGHT
+    auto tile_mout1 = writer->declare_tile("mout1", ckw::TileInfo(ckw::DataType::Int32)); // HEIGHT or 0
+    auto tile_bout0 = writer->declare_tile("bout0", ckw::TileInfo(ckw::DataType::Int32)); // BATCH SIZE IDX
+
+    // Calculate coordinates
+    get_coordinate_from_gws_overlapping_min(writer, tile_nout0, tile_gid_0, const_n0_i32, const_shift_back_n0_i32,
+                                            const_0_i32);
+    get_coordinate_from_gws(writer, tile_mout0, tile_gid_1, const_m0_i32);
+
+    // Get the boundary aware coordinates at each global dimension index
+    if (sampler_src.format() == ckw::TensorSamplerFormat::Dim0_Dim1xDim2_1)
+    {
+        writer->op_assign(tile_mout1, const_0_i32);
+        get_coordinate_from_gws(writer, tile_bout0, tile_gid_2, const_pos_1_i32);
+    }
+    else if (sampler_src.format() == ckw::TensorSamplerFormat::Dim0_Dim1_Dim2)
+    {
+        // For tile_mout1 and tile_bout0 the step can only be 1
+        writer->op_binary(tile_mout1, ckw::BinaryOp::Mod, tile_gid_2, const_dst_h_i32);
+        writer->op_binary(tile_bout0, ckw::BinaryOp::Div, tile_gid_2, const_dst_h_i32);
+    }
+
+    /********************************************************************************
+     * 8 - Write the rest of the code
+     ********************************************************************************/
+    writer->op_store(dst->tensor(), tile_src, sampler_src, tile_nout0, tile_mout0, tile_mout1, tile_bout0);
+}
+
+std::string GpuCkwStore::get_name(const ComponentGroup &comp_group) const
+{
+    ARM_COMPUTE_UNUSED(comp_group);
+    return "store";
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
new file mode 100644
index 0000000000..c9ce7eb269
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwStore : public IGpuCkwComponentDriver
+{
+public:
+    /** Constructor
+     *
+     * @param[in] id      Component id
+     * @param[in] tensors Tensor arguments to the component
+     */
+    GpuCkwStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors);
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwStore);
+    /** Destructor */
+    ~GpuCkwStore() override = default;
+    // Inherited methods overriden:
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    std::string  get_name(const ComponentGroup &comp_group) const override;
+
+private:
+    const ITensorInfo *_src;
+    const ITensorInfo *_dst;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.cpp
new file mode 100644
index 0000000000..1e6f0841ad
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CkwHelper.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+void get_coordinate_from_gws(GpuCkwScopedKernelWriter writer,
+                             ckw::TileOperand        &coord,
+                             const ckw::TileOperand  &gid,
+                             ckw::TileOperand        &step)
+{
+    writer->op_binary(coord, ckw::BinaryOp::Mul, gid, step);
+}
+
+void get_coordinate_from_gws_overlapping_min(GpuCkwScopedKernelWriter writer,
+                                             ckw::TileOperand        &coord,
+                                             const ckw::TileOperand  &gid,
+                                             ckw::TileOperand        &step,
+                                             ckw::TileOperand        &shift_back,
+                                             ckw::TileOperand        &const_0)
+{
+    // Applied formula: max((gid * step) - shift_back, 0)
+    // where the shift_back operand is: (step - leftover_step) % step
+
+    writer->op_binary(coord, ckw::BinaryOp::Mul, gid, step);
+    writer->op_binary(coord, ckw::BinaryOp::Sub, coord, shift_back);
+    writer->op_binary(coord, ckw::BinaryOp::Max, coord, const_0);
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h
new file mode 100644
index 0000000000..956e7c8ecb
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/CkwHelper.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_CKWHELPER_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_CKWHELPER_H
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Get coordinate along one axis.
+ *
+ * @param[in,out] writer Writer
+ * @param[out]    coord  Resultant coordinate
+ * @param[in]     gid    Global work item id
+ * @param[in]     step   Step size / vector size
+ */
+void get_coordinate_from_gws(GpuCkwScopedKernelWriter writer,
+                             ckw::TileOperand        &coord,
+                             const ckw::TileOperand  &gid,
+                             ckw::TileOperand        &step);
+
+/** Get boundary aware coordinate along one axis.
+ *
+ * @param[in,out] writer     Writer
+ * @param[out]    coord      Resultant coordinate
+ * @param[in]     gid        Global work item id
+ * @param[in]     step       Step size / vector size
+ * @param[in]     shift_back It is (step - leftover_step) % step
+ * @param[in]     const_0    Constant tile of value 0
+ */
+void get_coordinate_from_gws_overlapping_min(GpuCkwScopedKernelWriter writer,
+                                             ckw::TileOperand        &coord,
+                                             const ckw::TileOperand  &gid,
+                                             ckw::TileOperand        &step,
+                                             ckw::TileOperand        &shift_back,
+                                             ckw::TileOperand        &const_0);
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_CKWHELPER_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.cpp
new file mode 100644
index 0000000000..ad31b06362
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Common.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+ckw::DataType to_ckw(DataType dt)
+{
+    switch (dt)
+    {
+        case DataType::F32:
+            return ckw::DataType::Fp32;
+        case DataType::F16:
+            return ckw::DataType::Fp16;
+        case DataType::S32:
+            return ckw::DataType::Int32;
+        case DataType::S16:
+            return ckw::DataType::Int16;
+        case DataType::S8:
+        case DataType::QASYMM8_SIGNED:
+            return ckw::DataType::Int8;
+        case DataType::U32:
+            return ckw::DataType::Uint32;
+        case DataType::U16:
+            return ckw::DataType::Uint16;
+        case DataType::U8:
+        case DataType::QASYMM8:
+            return ckw::DataType::Uint8;
+        default:
+            return ckw::DataType::Unknown;
+    }
+}
+
+ckw::TensorShape to_ckw(const TensorShape &shape)
+{
+    ARM_COMPUTE_ERROR_ON(shape.num_max_dimensions < std::tuple_size<ckw::TensorShape>{});
+    ARM_COMPUTE_ERROR_ON(std::tuple_size<ckw::TensorShape>{} != 5);
+    /// NOTE: Overflow danger. Use size_t?
+    return ckw::TensorShape{static_cast<int32_t>(shape[0]), static_cast<int32_t>(shape[1]),
+                            static_cast<int32_t>(shape[2]), static_cast<int32_t>(shape[3]),
+                            static_cast<int32_t>(shape[4])};
+}
+
+ckw::TensorDataLayout to_ckw(DataLayout dl)
+{
+    switch (dl)
+    {
+        case DataLayout::NHWC:
+            return ckw::TensorDataLayout::Nhwc;
+        case DataLayout::NDHWC:
+            return ckw::TensorDataLayout::Ndhwc;
+        default:
+            return ckw::TensorDataLayout::Unknown;
+    }
+}
+
+ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info)
+{
+    return ckw::TensorInfo{to_ckw(tensor_info.data_type()), to_ckw(tensor_info.tensor_shape()),
+                           to_ckw(tensor_info.data_layout()), tensor_info.id()};
+}
+
+ckw::TensorStorageType to_ckw(const TensorStorageType &storage)
+{
+    switch (storage)
+    {
+        case TensorStorageType::ClBufferUint8Ptr:
+            return ckw::TensorStorageType::BufferUint8Ptr;
+        case TensorStorageType::ClImage2dReadOnly:
+            return ckw::TensorStorageType::Texture2dReadOnly;
+        case TensorStorageType::ClImage2dWriteOnly:
+            return ckw::TensorStorageType::Texture2dWriteOnly;
+        case TensorStorageType::Unknown:
+            return ckw::TensorStorageType::Unknown;
+        default:
+            ARM_COMPUTE_ERROR("Unknown tensor storage type");
+    }
+}
+
+TensorComponentType from_ckw(const ckw::TensorComponentType &component)
+{
+    switch (component)
+    {
+        case ckw::TensorComponentType::OffsetFirstElement:
+            return TensorComponentType::OffsetFirstElement;
+        case ckw::TensorComponentType::Stride0:
+            return TensorComponentType::Stride0;
+        case ckw::TensorComponentType::Stride1:
+            return TensorComponentType::Stride1;
+        case ckw::TensorComponentType::Stride2:
+            return TensorComponentType::Stride2;
+        case ckw::TensorComponentType::Stride3:
+            return TensorComponentType::Stride3;
+        case ckw::TensorComponentType::Stride4:
+            return TensorComponentType::Stride4;
+        case ckw::TensorComponentType::Dim0:
+            return TensorComponentType::Dim0;
+        case ckw::TensorComponentType::Dim1:
+            return TensorComponentType::Dim1;
+        case ckw::TensorComponentType::Dim2:
+            return TensorComponentType::Dim2;
+        case ckw::TensorComponentType::Dim3:
+            return TensorComponentType::Dim3;
+        case ckw::TensorComponentType::Dim4:
+            return TensorComponentType::Dim4;
+        case ckw::TensorComponentType::Dim1xDim2:
+            return TensorComponentType::Dim1xDim2;
+        case ckw::TensorComponentType::Dim2xDim3:
+            return TensorComponentType::Dim2xDim3;
+        case ckw::TensorComponentType::Dim1xDim2xDim3:
+            return TensorComponentType::Dim1xDim2xDim3;
+        case ckw::TensorComponentType::Unknown:
+            return TensorComponentType::Unknown;
+        default:
+            ARM_COMPUTE_ERROR("Unknown CKW tensor component");
+    }
+}
+
+TensorStorageType from_ckw(const ckw::TensorStorageType &storage)
+{
+    switch (storage)
+    {
+        case ckw::TensorStorageType::BufferUint8Ptr:
+            return TensorStorageType::ClBufferUint8Ptr;
+        case ckw::TensorStorageType::Texture2dReadOnly:
+            return TensorStorageType::ClImage2dReadOnly;
+        case ckw::TensorStorageType::Texture2dWriteOnly:
+            return TensorStorageType::ClImage2dWriteOnly;
+        case ckw::TensorStorageType::Unknown:
+            return TensorStorageType::Unknown;
+        default:
+            ARM_COMPUTE_ERROR("Unknown CKW tensor storage type");
+    }
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h
new file mode 100644
index 0000000000..26740cdd04
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPE_CONVERTER_COMMON_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPE_CONVERTER_COMMON_H
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+
+#include "compute_kernel_writer/include/ckw/TensorInfo.h"
+#include "compute_kernel_writer/include/ckw/types/DataType.h"
+#include "compute_kernel_writer/include/ckw/types/TensorComponentType.h"
+#include "compute_kernel_writer/include/ckw/types/TensorStorageType.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Convert the Compute Library data type to Compute Kernel Writer data type
+ *
+ * @param[in] dt The Compute Library data type
+ *
+ * @return the Compute Kernel Writer data type (ckw::DataType)
+ */
+ckw::DataType to_ckw(DataType dt);
+
+/** Convert the Compute Library tensor shape to Compute Kernel Writer tensor shape
+ *
+ * @param[in] shape The Compute Library tensor shape
+ *
+ * @return the Compute Kernel Writer tensor shape (ckw::TensorShape)
+ */
+ckw::TensorShape to_ckw(const TensorShape &shape);
+
+/** Convert the Compute Library data layout to Compute Kernel Writer data layout
+ *
+ * @param[in] dl The Compute Library data layout
+ *
+ * @return the Compute Kernel Writer data layout (ckw::TensorDataLayout)
+ */
+ckw::TensorDataLayout to_ckw(DataLayout dl);
+
+/** Convert the Compute Library tensor info to Compute Kernel Writer tensor info
+ *
+ * @param[in] tensor_info The Compute Library tensor info
+ *
+ * @return the Compute Kernel Writer tensor info (ckw::TensorInfo)
+ */
+ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info);
+
+/** Convert the Compute Library tensor storage to Compute Kernel Writer tensor storage
+ *
+ * @param[in] storage The Compute Library tensor storage
+ *
+ * @return the Compute Kernel Writer tensor storate (ckw::TensorStorageType)
+ */
+ckw::TensorStorageType to_ckw(const TensorStorageType &storage);
+
+/** Convert the Compute Kernel Writer tensor component to Compute Library tensor component
+ *
+ * @param[in] component The Compute Kernel Writer tensor component
+ *
+ * @return the Compute Library tensor component
+ */
+TensorComponentType from_ckw(const ckw::TensorComponentType &component);
+
+/** Convert the Compute Kernel Writer tensor storage to Compute Library tensor storage
+ *
+ * @param[in] storage The Compute Kernel Writer tensor storage
+ *
+ * @return the Compute Library tensor storage
+ */
+TensorStorageType from_ckw(const ckw::TensorStorageType &storage);
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPE_CONVERTER_COMMON_H
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.cpp
new file mode 100644
index 0000000000..5630e390d5
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h"
+
+#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+ckw::BinaryOp to_ckw(const ElementwiseBinaryCommonAttributes &attributes)
+{
+    switch (attributes.operation())
+    {
+        case ElementwiseBinaryCommonAttributes::ElementwiseOp::Add:
+            return ckw::BinaryOp::Add;
+        case ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub:
+            return ckw::BinaryOp::Sub;
+        case ElementwiseBinaryCommonAttributes::ElementwiseOp::Div:
+            return ckw::BinaryOp::Div;
+        case ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul:
+            return ckw::BinaryOp::Mul;
+        case ElementwiseBinaryCommonAttributes::ElementwiseOp::Min:
+        case ElementwiseBinaryCommonAttributes::ElementwiseOp::Max:
+        case ElementwiseBinaryCommonAttributes::ElementwiseOp::Power:
+        case ElementwiseBinaryCommonAttributes::ElementwiseOp::Prelu:
+        case ElementwiseBinaryCommonAttributes::ElementwiseOp::SquaredDiff:
+        default:
+            ARM_COMPUTE_ERROR("Cannot convert ElementwiseBinaryCommonAttributes to corresponding ckw::BinaryOp");
+    }
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h
new file mode 100644
index 0000000000..644a407702
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPE_CONVERTER_ELEMENTWISEBINARY_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPE_CONVERTER_ELEMENTWISEBINARY_H
+
+#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
+
+#include "compute_kernel_writer/include/ckw/types/Operators.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+ckw::BinaryOp to_ckw(const ElementwiseBinaryCommonAttributes &attributes);
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPE_CONVERTER_ELEMENTWISEBINARY_H