40 files changed, 5618 insertions, 0 deletions
diff --git a/src/dynamic_fusion/sketch/ArgumentPack.h b/src/dynamic_fusion/sketch/ArgumentPack.h
new file mode 100644
index 0000000000..f118d7d851
--- /dev/null
+++ b/src/dynamic_fusion/sketch/ArgumentPack.h
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_ARGUMENTPACK
+#define SRC_DYNAMIC_FUSION_SKETCH_ARGUMENTPACK
+
+#include "arm_compute/core/experimental/Types.h"
+#include <unordered_map>
+#include <vector>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** This is a generic class that packs the arguments of an operator. For now, it is only used for tensor-related types
+ * Examples of "tensor-related types": @ref ITensorInfo, @ref ITensor, @ref ICLTensor
+ *
+ * The argument id is the position of the argument within the pack, and is represented by @ref TensorType
+ *
+ * @tparam T Tensor-related type
+ */
+template <typename T>
+class ArgumentPack
+{
+public:
+    /** @ref TensorType encodes the position of a tensor argument within the pack */
+    using Id = TensorType;
+    /** A single argument element within the pack
+     * It contains either a const pointer or a non-const pointer to the Tensor-related type T, but never at the same time
+     */
+    struct PackElement
+    {
+        PackElement()                        = default;
+        PackElement(const PackElement &elem) = default;
+        PackElement &operator=(const PackElement &elem) = default;
+        PackElement(PackElement &&elem)                 = default;
+        PackElement &operator=(PackElement &&elem) = default;
+        PackElement(Id id, T *tensor)
+            : id(id), tensor(tensor), ctensor(nullptr)
+        {
+        }
+        PackElement(Id id, const T *ctensor)
+            : id(id), tensor(nullptr), ctensor(ctensor)
+        {
+        }
+
+        Id       id{ ACL_UNKNOWN }; /**< Argument id within the pack */
+        T       *tensor{ nullptr }; /**< Non-const pointer to tensor-related object */
+        const T *ctensor
+        {
+            nullptr
+        }; /**< Const pointer to tensor-related object */
+    };
+
+public:
+    /** Default constructor */
+    ArgumentPack() = default;
+    /** Destructor */
+    ~ArgumentPack() = default;
+    /** Allow instances of this class to be copy constructed */
+    ArgumentPack<T>(const ArgumentPack<T> &other) = default;
+    /** Allow instances of this class to be copied */
+    ArgumentPack<T> &operator=(const ArgumentPack<T> &other) = default;
+    /** Allow instances of this class to be move constructed */
+    ArgumentPack<T>(ArgumentPack<T> &&other) = default;
+    /** Allow instances of this class to be moved */
+    ArgumentPack<T> &operator=(ArgumentPack<T> &&other) = default;
+    /** Initializer list Constructor */
+    ArgumentPack(const std::initializer_list<PackElement> &l)
+        : _pack{}
+    {
+        for(const auto &e : l)
+        {
+            _pack[e.id] = e;
+        }
+    }
+    /** Add tensor to the pack
+     *
+     * @param[in] id     ID of the tensor to add
+     * @param[in] tensor Tensor to add
+     */
+    void add_tensor(Id id, T *tensor)
+    {
+        _pack[id] = PackElement(id, tensor);
+    }
+    /** Add const tensor to the pack
+     *
+     * @param[in] id     ID of the tensor to add
+     * @param[in] tensor Tensor to add
+     */
+    void add_const_tensor(Id id, const T *tensor)
+    {
+        _pack[id] = PackElement(id, tensor);
+    }
+    /** Get tensor of a given id from the pack
+     *
+     * @param[in] id ID of tensor to extract
+     *
+     * @return The pointer to the tensor if exist and is non-const else nullptr
+     */
+    T *get_tensor(Id id)
+    {
+        auto it = _pack.find(id);
+        return it != _pack.end() ? it->second.tensor : nullptr;
+    }
+    /** Get constant tensor of a given id
+     *
+     * @param[in] id ID of tensor to extract
+     *
+     * @return The pointer to the tensor (const or not) if exist else nullptr
+     */
+    const T *get_const_tensor(Id id) const
+    {
+        auto it = _pack.find(id);
+        if(it != _pack.end())
+        {
+            return it->second.ctensor != nullptr ? it->second.ctensor : it->second.tensor;
+        }
+        return nullptr;
+    }
+    /** Remove the tensor stored with the given id
+     *
+     * @param[in] id ID of tensor to remove
+     */
+    void remove_tensor(Id id)
+    {
+        _pack.erase(id);
+    }
+    /** Pack size accessor
+     *
+     * @return Number of tensors registered to the pack
+     */
+    size_t size() const
+    {
+        return _pack.size();
+    }
+    /** Checks if pack is empty
+     *
+     * @return True if empty else false
+     */
+    bool empty() const
+    {
+        return _pack.empty();
+    }
+    /** Get the ACL_SRC_* tensors
+     *
+     * @return std::vector<T *>
+     */
+    std::vector<T *> get_src_tensors()
+    {
+        std::vector<T *> src_tensors{};
+        for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
+        {
+            auto tensor = get_tensor(static_cast<TensorType>(id));
+            if(tensor != nullptr)
+            {
+                src_tensors.push_back(tensor);
+            }
+        }
+        return src_tensors;
+    }
+    /** Get the const ACL_SRC_* tensors
+     *
+     * @return std::vector<const T *>
+     */
+    std::vector<const T *> get_const_src_tensors() const
+    {
+        std::vector<const T *> src_tensors{};
+        for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
+        {
+            auto tensor = get_const_tensor(static_cast<TensorType>(id));
+            if(tensor != nullptr)
+            {
+                src_tensors.push_back(tensor);
+            }
+        }
+        return src_tensors;
+    }
+    /** Get the ACL_DST_* tensors
+     *
+     * @return std::vector<T *>
+     */
+    std::vector<T *> get_dst_tensors()
+    {
+        std::vector<T *> dst_tensors{};
+        for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
+        {
+            auto tensor = get_tensor(static_cast<TensorType>(id));
+            if(tensor != nullptr)
+            {
+                dst_tensors.push_back(tensor);
+            }
+        }
+        return dst_tensors;
+    }
+    /** Get the const ACL_DST_* tensors
+     *
+     * @return std::vector<const T *>
+     */
+    std::vector<const T *> get_const_dst_tensors() const
+    {
+        std::vector<const T *> dst_tensors{};
+        for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
+        {
+            auto tensor = get_const_tensor(static_cast<TensorType>(id));
+            if(tensor != nullptr)
+            {
+                dst_tensors.push_back(tensor);
+            }
+        }
+        return dst_tensors;
+    }
+
+private:
+    std::unordered_map<int, PackElement> _pack{}; /**< Container with the packed tensors */
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_ARGUMENTPACK */
diff --git a/src/dynamic_fusion/sketch/OperatorAttributes.cpp b/src/dynamic_fusion/sketch/OperatorAttributes.cpp
new file mode 100644
index 0000000000..51ec444587
--- /dev/null
+++ b/src/dynamic_fusion/sketch/OperatorAttributes.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+Conv2dAttributes &Conv2dAttributes::pad(const Padding2D &pad)
+{
+    _pad = pad;
+    return *this;
+}
+Padding2D Conv2dAttributes::pad() const
+{
+    return _pad;
+}
+Conv2dAttributes &Conv2dAttributes::stride(const Size2D &stride)
+{
+    _stride = stride;
+    return *this;
+}
+Size2D Conv2dAttributes::stride() const
+{
+    return _stride;
+}
+Conv2dAttributes &Conv2dAttributes::dilation(const Size2D &dilation)
+{
+    _dilation = dilation;
+    return *this;
+}
+Size2D Conv2dAttributes::dilation() const
+{
+    return _dilation;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuComponentServices.h b/src/dynamic_fusion/sketch/gpu/GpuComponentServices.h
new file mode 100644
index 0000000000..93881508bb
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuComponentServices.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTSERVICES
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTSERVICES
+
+#include "src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Services that are used throughout the creation phase of workload code
+ */
+class GpuComponentServices
+{
+public:
+    /** Default constructor */
+    GpuComponentServices() = default;
+    /** Get reference to component factory */
+    GpuKernelComponentFactory &component_factory()
+    {
+        return _comp_factory;
+    }
+
+private:
+    GpuKernelComponentFactory _comp_factory{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTSERVICES */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp
new file mode 100644
index 0000000000..9cecfc2ffd
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1)
+{
+    return info0.type == info1.type;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
new file mode 100644
index 0000000000..eb36e91d48
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT
+
+#include "arm_compute/core/TensorInfo.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Contain information required to set up a kernel argument at run time
+ */
+struct GpuKernelArgumentInfo
+{
+    /** Enumerate all the tensor arguments variants used by all kernel implementations.  */
+    enum class Type : int
+    {
+        Scalar,
+
+        Vector,
+
+        Image,
+        Image_Reinterpret_As_3D,
+        Image_Export_To_ClImage2D,
+
+        Image_3D, // 3D Tensor represented as a 2D Image + stride_z
+        Image_3D_Export_To_ClImage2D,
+
+        Tensor_3D,
+        Tensor_4D,
+        Tensor_4D_t_Buffer,
+        Tensor_4D_t_Image
+    };
+    /** Default constructor */
+    GpuKernelArgumentInfo() = default;
+    /** Constructor */
+    GpuKernelArgumentInfo(Type type)
+        : type{ type }
+    {
+    }
+    Type type{ Type::Tensor_4D_t_Buffer };
+};
+
+bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1);
+
+/** Kernel argument information linked with its corresponding @ref ITensorInfo
+ */
+class GpuKernelArgument
+{
+public:
+    /** Constructor
+     *
+     * @param[in] tensor_info     Associated @ref ITensorInfo
+     * @param[in] kernel_arg_info Associated @ref GpuKernelArgumentInfo
+     */
+    GpuKernelArgument(const ITensorInfo           &tensor_info,
+                      const GpuKernelArgumentInfo &kernel_arg_info)
+        : _tensor_info{ tensor_info },
+          _kernel_arg_info{ kernel_arg_info }
+    {
+    }
+    /** Get workload tensor id */
+    ITensorInfo::Id id() const
+    {
+        return _tensor_info.id();
+    }
+    /** Get associated @ref ITensorInfo */
+    ITensorInfo *tensor_info()
+    {
+        return &_tensor_info;
+    }
+    /** Get associated @ref ITensorInfo */
+    const ITensorInfo *tensor_info() const
+    {
+        return &_tensor_info;
+    }
+    /** Get associated @ref GpuKernelArgumentInfo */
+    GpuKernelArgumentInfo *kernel_argument_info()
+    {
+        return &_kernel_arg_info;
+    }
+    /** Get associated @ref GpuKernelArgumentInfo */
+    const GpuKernelArgumentInfo *kernel_argument_info() const
+    {
+        return &_kernel_arg_info;
+    }
+    /** Check if the associated workload tensor has valid id
+     *
+     * @return true if has valid id
+     * @return false  otherwise
+     */
+    bool has_valid_id() const
+    {
+        return _tensor_info.has_valid_id();
+    }
+
+private:
+    TensorInfo            _tensor_info{};
+    GpuKernelArgumentInfo _kernel_arg_info{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
new file mode 100644
index 0000000000..6e6422c957
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuKernelComponentGraph.h"
+
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+/** Automatically create memory descriptors for all tensors in the graph
+ *
+ * @param[in] tensors @ref ITensorInfo map
+ * @param[in] graph   @ref DependencyGraph of which the @p tensors are a part
+ *
+ * @return MemoryDescriptorMap  An assignment map of @ref MemoryDescriptors for each ITensorInfo in the graph
+ */
+MemoryDescriptorMap assign_memory_descriptors(const std::map<ITensorInfo::Id, const ITensorInfo *> tensors, const DependencyGraph &graph)
+{
+    MemoryDescriptorMap mem_map{};
+    for(auto t_id : graph.all_tensors())
+    {
+        const auto &tensor = tensors.at(t_id);
+        // Only global src and dst tensors to the entire component graph are "User" tensors, which are user-specified memories
+        if(is_in(t_id, graph.global_src_tensors()) || is_in(t_id, graph.global_dst_tensors()))
+        {
+            mem_map[t_id] = MemoryDescriptor{ MemoryType::User };
+        }
+        else
+        {
+            AuxMemoryInfo aux_mem_info{ tensor->total_size() };
+            mem_map[t_id] = MemoryDescriptor{ MemoryType::Auxiliary, aux_mem_info };
+        }
+    }
+    return mem_map;
+}
+
+} // namespace
+
+std::vector<DependencyGraph::TensorId> GpuKernelComponentGraph::get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
+{
+    std::vector<DependencyGraph::TensorId> tensor_ids{};
+    std::transform(
+        std::begin(tensors), std::end(tensors),
+        std::back_inserter(tensor_ids),
+        [](const auto & t)
+    {
+        return t->id();
+    });
+    return tensor_ids;
+}
+
+GpuKernelComponentGraph::GpuKernelComponentGraph(GpuComponentServices *services)
+    : _services{ services }, _components{}, _tensors{}, _dependency_graph{}
+{
+}
+
+GpuKernelComponentStream GpuKernelComponentGraph::fuse() const
+{
+    // Obtain memory descriptor map
+    const auto mem_map = assign_memory_descriptors(_tensors, _dependency_graph);
+    /// @note Fusion constraints (for kernel components) are exactly the same as the invariants of @ref GpuKernelComponentGroup
+    /// Fusion can be framed as a mathematical optimization problem:
+    /// Given fusion constraints, find the "best" fusion patterns possible
+    /// "Best" is ill-defined at the moment. For now we define "best" fusion pattern as one
+    /// which results in the least number of fused kernels ( @ref GpuKernelComponentGroup ) at the end
+
+    /// As the first iteration, we offer a sub-optimal algorithm here which ensures all
+    /// constraints are met, but provides no guarantee that the fusion pattern is optimal
+
+    GpuKernelComponentStream stream{ _services, mem_map };
+    // Break down into linear groups of components (constraint 1), preserving topological order
+    const auto linear_graphs = _dependency_graph.topological_partition();
+
+    // Further divide up the linear groups based on rest of the fusion constraints (rely on component group's invariants)
+    for(const auto &graph : linear_graphs)
+    {
+        for(unsigned int i = 0; i < graph.size(); ++i)
+        {
+            const auto comp = _components.at(graph[i].op).get();
+            // Each new linear graph signals a new component group in the stream
+            if(i == 0)
+            {
+                stream.new_component_group();
+            }
+            // If it violates the component group's invariant / fusion constraint, breaks up the stream by inserting a new group
+            bool success = stream.add_component(comp);
+            if(!success)
+            {
+                stream.new_component_group();
+                success = stream.add_component(comp);
+                ARM_COMPUTE_ERROR_ON(!success);
+            }
+        }
+    }
+    return stream;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
new file mode 100644
index 0000000000..fbcb2c10ea
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
+
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h"
+#include "src/dynamic_fusion/sketch/utils/DependencyGraph.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class IGpuKernelComponent;
+
+/** A multi-input (tensors), multi-output (tensors) acyclic directed graph of gpu kernel components
+ * Its main purposes are:
+ *  - Perform "graph-level" optimizations like fusion of kernel components (not the fusion of operators)
+ *  - Automatically assign memory descriptions @ref MemoryDescriptor of all tensors based on graph topology
+ */
+class GpuKernelComponentGraph
+{
+public:
+    /** Constructor
+     *
+     * @param[in] services @ref GpuComponentServices to be used by the graph
+     */
+    GpuKernelComponentGraph(GpuComponentServices *services);
+    /** Prevent instances of this class from being copy constructed */
+    GpuKernelComponentGraph(const GpuKernelComponentGraph &graph) = delete;
+    /** Prevent instances of this class from being copied */
+    GpuKernelComponentGraph &operator=(const GpuKernelComponentGraph &graph) = delete;
+    /** Allow instances of this class to be move constructed */
+    GpuKernelComponentGraph(GpuKernelComponentGraph &&graph) = default;
+    /** Allow instances of this class to be moved */
+    GpuKernelComponentGraph &operator=(GpuKernelComponentGraph &&graph) = default;
+    /** Create a new component and add it to the component graph
+     * Component id is automatically allocated
+     *
+     * @tparam T    Component type
+     * @tparam Args Component argument types
+     *
+     * @param[in] args Component arguments except for component id, which is auto-allocated
+     */
+    template <typename T, typename... Args>
+    void add_new_component(Args &&... args)
+    {
+        auto                      comp           = _services->component_factory().create<T>(std::forward<Args>(args)...);
+        ArgumentPack<ITensorInfo> tensors        = comp->tensors();
+        const auto                src_tensor_ids = get_tensor_ids(tensors.get_const_src_tensors());
+        const auto                dst_tensor_ids = get_tensor_ids(tensors.get_const_dst_tensors());
+        bool                      success        = _dependency_graph.add_operator(comp->id(), src_tensor_ids, dst_tensor_ids);
+        ARM_COMPUTE_ERROR_ON(!success);
+        _components[comp->id()] = std::move(comp);
+        for(auto t : tensors.get_const_src_tensors())
+        {
+            _tensors[t->id()] = t;
+        }
+        for(auto t : tensors.get_const_dst_tensors())
+        {
+            _tensors[t->id()] = t;
+        }
+    }
+    /** Perform component fusion and serialize the graph into a stream of component groups
+     */
+    GpuKernelComponentStream fuse() const;
+
+private:
+    static std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors);
+    GpuComponentServices *_services;
+    std::map<ComponentId, std::unique_ptr<IGpuKernelComponent>> _components;
+    std::map<ITensorInfo::Id, const ITensorInfo *>              _tensors;
+    DependencyGraph _dependency_graph{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
new file mode 100644
index 0000000000..3af4c1429d
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuKernelComponentGroup.h"
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+bool GpuKernelComponentGroup::add_component(ComponentPtr component)
+{
+    // note: Constraint 1 is guaranteed as a precondition
+    // Constraint 2
+    if(component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
+    {
+        return false;
+    }
+    // Constraint 3.1: Pattern: (Unfusable + Output)
+    if(!_components.empty() && get_root_component()->type() == GpuComponentType::Unfusable && component->type() != GpuComponentType::Output)
+    {
+        return false;
+    }
+    // Constraint 3.2
+    if(!_components.empty() && (component->type() != GpuComponentType::Simple && component->type() != GpuComponentType::Output))
+    {
+        return false;
+    }
+    // Constraint 3.3: Disallow multiple output components
+    if(!_components.empty() && get_last_component()->type() == GpuComponentType::Output && component->type() == GpuComponentType::Output)
+    {
+        return false;
+    }
+    // Constraint 4
+    if(component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
+    {
+        return false;
+    }
+    // Constraint 5
+    if(!_components.empty() && !(get_root_component()->properties() == component->properties()))
+    {
+        return false;
+    }
+    // Constraint 7
+    if(!_components.empty())
+    {
+        const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
+        ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
+        const auto first_dst_tensor = root_dst_tensors[0];
+        const auto dst_tensors      = component->tensors().get_const_dst_tensors();
+        for(const auto &t : root_dst_tensors)
+        {
+            if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+            {
+                return false;
+            }
+        }
+        for(const auto &t : dst_tensors)
+        {
+            if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+            {
+                return false;
+            }
+        }
+    }
+    // Constraint 8
+    if(!_components.empty())
+    {
+        const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
+        ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
+        const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
+        const auto dst_tensors             = component->tensors().get_const_dst_tensors();
+        for(const auto &t : root_dst_tensors)
+        {
+            if(t->data_layout() != first_dst_tensor_layout)
+            {
+                return false;
+            }
+        }
+        for(const auto &t : dst_tensors)
+        {
+            if(t->data_layout() != first_dst_tensor_layout)
+            {
+                return false;
+            }
+        }
+    }
+    // Constraint 9
+    if(component->tensors().get_const_dst_tensors().size() >= max_dst_tensors)
+    {
+        return false;
+    }
+    // Constraint 9 corollary
+    if(component->type() == GpuComponentType::Output && _components.size() >= max_fused_components + max_dst_tensors)
+    {
+        return false;
+    }
+    _components.push_back(component);
+    return true;
+}
+
+std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_src_tensors() const
+{
+    if(_components.empty())
+    {
+        return {};
+    }
+    auto src_tensors     = _components[0]->tensors().get_const_src_tensors();
+    auto prev_dst_tensor = _components[0]->tensors().get_const_dst_tensors()[0]; // PRE: Only one dst tensor per component
+    for(unsigned int i = 1; i < _components.size(); ++i)
+    {
+        auto cur_src_tensors = _components[i]->tensors().get_const_src_tensors();
+        for(const auto src_tensor : cur_src_tensors)
+        {
+            if(src_tensor->id() == prev_dst_tensor->id())
+            {
+                continue; // Skip "intermediate" tensors. I.e. tensors that are used to link between two components
+            }
+            src_tensors.push_back(src_tensor);
+        }
+        prev_dst_tensor = _components[i]->tensors().get_const_dst_tensors()[0]; // PRE: Only one dst tensor per component
+    }
+
+    return src_tensors;
+}
+
+std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_dst_tensors() const
+{
+    if(_components.empty())
+    {
+        return {};
+    }
+    const auto                       dst_tensor_ptrs = _components[_components.size() - 1]->tensors().get_const_dst_tensors();
+    std::vector<const ITensorInfo *> dst_tensors;
+    for(auto tensor_ptr : dst_tensor_ptrs)
+    {
+        dst_tensors.push_back(tensor_ptr);
+    }
+    return dst_tensors;
+}
+
+std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_argument_tensors() const
+{
+    std::vector<const ITensorInfo *> arguments;
+    const auto                       src_tensors = get_src_tensors();
+    const auto                       dst_tensors = get_dst_tensors();
+    arguments.reserve(src_tensors.size() + dst_tensors.size());
+    arguments.insert(arguments.end(), src_tensors.begin(), src_tensors.end());
+    arguments.insert(arguments.end(), dst_tensors.begin(), dst_tensors.end());
+    return arguments;
+}
+
+GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_component() const
+{
+    if(empty())
+    {
+        return nullptr;
+    }
+    return _components[0];
+}
+
+GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_last_component() const
+{
+    if(empty())
+    {
+        return nullptr;
+    }
+    return _components[_components.size() - 1];
+}
+
+GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_previous_component(ComponentId id) const
+{
+    if(empty())
+    {
+        return nullptr;
+    }
+    // Get the index of the requested component
+    size_t ind = 0;
+    for(const auto c : _components)
+    {
+        if(c->id() == id)
+        {
+            break;
+        }
+        ind++;
+    }
+    if(ind == 0 || ind >= _components.size())
+    {
+        return nullptr;
+    }
+    return _components[ind - 1];
+}
+
+bool GpuKernelComponentGroup::is_intermediate_tensor(const ITensorInfo *tensor) const
+{
+    return is_tensor_in(tensor, get_interm_tensors());
+}
+
+size_t GpuKernelComponentGroup::size() const
+{
+    return _components.size();
+}
+bool GpuKernelComponentGroup::empty() const
+{
+    return _components.empty();
+}
+GpuKernelComponentGroup::ComponentPtr &GpuKernelComponentGroup::operator[](size_t index)
+{
+    return _components[index];
+}
+const GpuKernelComponentGroup::ComponentPtr &GpuKernelComponentGroup::operator[](size_t index) const
+{
+    return _components[index];
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::iterator GpuKernelComponentGroup::begin()
+{
+    return _components.begin();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::iterator GpuKernelComponentGroup::end()
+{
+    return _components.end();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::begin() const
+{
+    return _components.cbegin();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::end() const
+{
+    return _components.cend();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::cbegin() const
+{
+    return _components.cbegin();
+}
+typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::cend() const
+{
+    return _components.cend();
+}
+
+std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_interm_tensors() const
+{
+    std::vector<const ITensorInfo *> interm_tensors{};
+    for(unsigned int i = 0; i + 1 < _components.size(); ++i)
+    {
+        auto interm_tensor = _components[i]->tensors().get_const_dst_tensors()[0];
+        interm_tensors.push_back(interm_tensor); // PRE: Only one dst tensor per component
+    }
+
+    return interm_tensors;
+}
+
+bool GpuKernelComponentGroup::is_tensor_in(const ITensorInfo *tensor, const std::vector<const ITensorInfo *> tensors)
+{
+    for(auto t : tensors)
+    {
+        if(tensor->id() == t->id())
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h
new file mode 100644
index 0000000000..4c9d940594
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP
+
+#include "components/Types.h"
+
+#include <cstdint>
+#include <cstdlib>
+#include <vector>
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class IGpuKernelComponent;
+/** A group of gpu kernel components to be fused together
+ * PRECONDITIONS:
+ * 1. Fusion is limited to a linear sequence of kernel components
+ * INVARIANTS:
+ * @note These preconditions and invariants are exactly the same as fusion constraints for kernel components
+ * 2. Max number of components that can be fused is @ref GpuKernelComponentGroup::max_fused_components (
+ *        excluding any output or input (if any) components.
+ *        The max number of output components are bound by the maximum number of dst tensors allowed for a component / component group
+ *    )
+ * 3. The fusion is subject to the pattern: (Complex + Simple * | Simple + Simple * | Un-fusable) + Output?
+ * 4. All components but unfusable, have exactly 1 dst tensor
+ * 5. All fused components share the same @ref IGpuKernelComponent::Properties ( @ref UnitWorkloadStage etc. )
+ * 6. All fused components share the same tunable parameters like tile size
+ * 7. All fused components share the same dst tensor shape
+ * 8. All fused components' tensors share the same @ref DataLayout
+ * 9. Maximum number of dst tensors allowed for an component (including unfusable) / component group is @ref GpuKernelComponentGroup::max_dst_tensors
+ *      This has an impact on the total number of components supported, which = max_fused_components + max_dst_tensors
+ */
+class GpuKernelComponentGroup
+{
+public:
+    using ComponentPtr = IGpuKernelComponent *;
+    /** Maximum number of components that can be fused into the same component group
+     */
+    static constexpr size_t max_fused_components = 64;
+    /** Maximum number of dst tensors allowed for a component / component
+     */
+    static constexpr size_t max_dst_tensors = 8;
+
+public:
+    /** Default constructor */
+    GpuKernelComponentGroup() = default;
+    /** Allow instances of this class to be copy constructed */
+    GpuKernelComponentGroup(const GpuKernelComponentGroup &) = default;
+    /** Allow instances of this class to be copied */
+    GpuKernelComponentGroup &operator=(const GpuKernelComponentGroup &) = default;
+    /** Allow instances of this class to be move constructed */
+    GpuKernelComponentGroup(GpuKernelComponentGroup &&) = default;
+    /** Allow instances of this class to be moved */
+    GpuKernelComponentGroup &operator=(GpuKernelComponentGroup &&) = default;
+    /** Add a component pointer into the group
+     * If the operation fails, then no change is made to the group
+     *
+     * @param[in] component Pointer to the component to be added
+     *
+     * @return true      If the operation is successful
+     * @return false     If the operation fails
+     */
+    bool add_component(ComponentPtr component);
+    /** Get source tensors of this group */
+    std::vector<const ITensorInfo *> get_src_tensors() const;
+    /** Get destination tensors of this group */
+    std::vector<const ITensorInfo *> get_dst_tensors() const;
+    /** Get tensor argument of this group
+     *  A tensor is an argument if it is a source or destination tensor to the group
+     */
+    std::vector<const ITensorInfo *> get_argument_tensors() const;
+    /** Get the root (first) component of this group */
+    ComponentPtr get_root_component() const;
+    /** Get the last component of this group */
+    ComponentPtr get_last_component() const;
+    /** Get the previous component to the component with id @p id
+     *
+     * @param[in] id Component id of the component whose previous component is of concern
+     *
+     * @return ComponentPtr  Pointer to the previous component of the one identified by @p id
+     */
+    ComponentPtr get_previous_component(ComponentId id) const;
+    /** Check if a @ref ITensorInfo is an "intermediate" tensor of the group
+     *
+     * An intermediate tensor is any tensor that is not an argument.
+     *
+     * @param[in] tensor @ref ITensorInfo to be looked up
+     *
+     * @return true  If @p tensor is an intermediate tensor
+     * @return false  Otherwise
+     */
+    bool is_intermediate_tensor(const ITensorInfo *tensor) const;
+    /** Get the number of components within the group */
+    size_t size() const;
+    /** Check if the component group is empty */
+    bool empty() const;
+    ComponentPtr &operator[](size_t index);
+    const ComponentPtr &operator[](size_t index) const;
+    typename std::vector<ComponentPtr>::iterator       begin();
+    typename std::vector<ComponentPtr>::iterator       end();
+    typename std::vector<ComponentPtr>::const_iterator begin() const;
+    typename std::vector<ComponentPtr>::const_iterator end() const;
+    typename std::vector<ComponentPtr>::const_iterator cbegin() const;
+    typename std::vector<ComponentPtr>::const_iterator cend() const;
+
+private:
+    std::vector<const ITensorInfo *> get_interm_tensors() const;
+
+    static bool is_tensor_in(const ITensorInfo *tensor, const std::vector<const ITensorInfo *> tensors);
+
+    std::vector<ComponentPtr> _components{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
new file mode 100644
index 0000000000..aac84b6c59
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuKernelComponentStream.h"
+
+#include "src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuKernelComponentStream::GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map)
+    : _services{ services }, _component_groups{}, _mem_map{ mem_map }
+{
+}
+
+GpuWorkloadSourceCode GpuKernelComponentStream::write_workload_code()
+{
+    GpuWorkloadSourceCode source_code;
+    // Traverse through component groups and assemble workload together
+    for(auto && group : _component_groups)
+    {
+        // Write kernel code
+        GpuLogicalKernel          logical_kernel(_services, group);
+        const GpuKernelSourceCode kernel_code = logical_kernel.write_kernel_code();
+        // The whole unit workload stage is determined by the root component
+        const auto unit_workload_stage = group.get_root_component()->properties().stage();
+        source_code.add_unit_workload(kernel_code, unit_workload_stage, _mem_map);
+    }
+    return source_code;
+}
+
+void GpuKernelComponentStream::new_component_group()
+{
+    _component_groups.emplace_back();
+}
+
+bool GpuKernelComponentStream::add_component(IGpuKernelComponent *component)
+{
+    ARM_COMPUTE_ERROR_ON(_component_groups.empty());
+    return _component_groups.back().add_component(component);
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
new file mode 100644
index 0000000000..cbaa7c297b
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTSTREAM
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTSTREAM
+
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuComponentServices;
+class IGpuKernelComponent;
+
+/** A linear sequence of component groups serialized from the @ref GpuKernelComponentGraph
+ *  Each component group in the stream denotes a complete kernel that may consist of multiple components
+ *
+ * The main purposes of this class are:
+ *  - Facilitate component fusion algorithm by allowing insertions of new component groups into the stream
+ *  - Invoke kernel writer and assemble the final @ref GpuWorkloadSourceCode
+ */
+class GpuKernelComponentStream
+{
+public:
+    /** Constructor
+     *
+     * @param[in] services @ref GpuComponentServices to be used throughout the stream
+     * @param[in] mem_map  @ref MemoryDescriptor map used to assemble the @ref GpuWorkloadSourceCode
+     */
+    GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map);
+    /** Allow instances of this class to be copy constructed */
+    GpuKernelComponentStream(const GpuKernelComponentStream &stream) = default;
+    /** Allow instances of this class to be copied */
+    GpuKernelComponentStream &operator=(const GpuKernelComponentStream &stream) = default;
+    /** Allow instances of this class to be move constructed */
+    GpuKernelComponentStream(GpuKernelComponentStream &&stream) = default;
+    /** Allow instances of this class to be moved */
+    GpuKernelComponentStream &operator=(GpuKernelComponentStream &&stream) = default;
+    /** Generate and assemble @ref GpuWorkloadSourceCode from the stream */
+    GpuWorkloadSourceCode write_workload_code();
+    /** Insert a new component group in the stream.
+     * Subsequent components are added to this group until end of stream or the next new_component_group is called
+     */
+    void new_component_group();
+    /** Add a component to the previously created component group
+     *  Throw an error if no component group is present in the stream
+     *
+     * @param[in] component Component to be inserted
+     *
+     * @return true      If the operation is successful
+     * @return false     Otherwise
+     */
+    bool add_component(IGpuKernelComponent *component);
+
+private:
+    GpuComponentServices                *_services;
+    std::vector<GpuKernelComponentGroup> _component_groups{};
+    MemoryDescriptorMap                  _mem_map{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTSTREAM */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
new file mode 100644
index 0000000000..7479328d7b
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/Window.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+
+#include <map>
+#include <string>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** The argument list of a @ref GpuKernelSourceCode */
+using GpuKernelArgumentList = std::map<ITensorInfo::Id, GpuKernelArgument>;
+
+/** Container of kernel code to be compiled and run in a @ref GpuUnitWorkload
+ */
+class GpuKernelSourceCode
+{
+public:
+    /** Set kernel name */
+    GpuKernelSourceCode &name(const std::string &n)
+    {
+        _name = n;
+        return *this;
+    }
+    /** Set kernel code */
+    GpuKernelSourceCode &code(const std::string &c)
+    {
+        _code = c;
+        return *this;
+    }
+    /** Set kernel config id string */
+    GpuKernelSourceCode &config_id(const std::string &c_id)
+    {
+        _config_id = c_id;
+        return *this;
+    }
+    /** Set kernel build options */
+    GpuKernelSourceCode &build_options(const CLBuildOptions &b_options)
+    {
+        _build_options = b_options;
+        return *this;
+    }
+    /** Set kernel execution window */
+    GpuKernelSourceCode &window(const Window &window)
+    {
+        _window = window;
+        return *this;
+    }
+    /** Set kernel argument list */
+    GpuKernelSourceCode &arguments(const GpuKernelArgumentList &arguments)
+    {
+        _arguments = arguments;
+        return *this;
+    }
+    /** Get kernel name */
+    std::string name() const
+    {
+        return _name;
+    }
+    /** Get kernel code */
+    std::string code() const
+    {
+        return _code;
+    }
+    /** Get kernel config id string */
+    std::string config_id() const
+    {
+        return _config_id;
+    }
+    /** Get kernel build options */
+    const CLBuildOptions &build_options() const
+    {
+        return _build_options;
+    }
+    /** Get kernel execution window */
+    const Window &window() const
+    {
+        return _window;
+    }
+    /** Get kernel argument list */
+    const GpuKernelArgumentList &arguments() const
+    {
+        return _arguments;
+    }
+
+private:
+    std::string           _name{};
+    std::string           _code{};
+    std::string           _config_id{};
+    CLBuildOptions        _build_options{};
+    Window                _window{};
+    GpuKernelArgumentList _arguments{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
new file mode 100644
index 0000000000..7746f8bbf3
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuLogicalKernel.h"
+
+#include "arm_compute/core/experimental/Types.h"
+
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components)
+    : _services{ services }, _comp_group{ components }, _store_components{}
+{
+    add_load_store();
+}
+
+GpuKernelSourceCode GpuLogicalKernel::write_kernel_code()
+{
+    GpuKernelSourceCode code;
+    ClTemplateWriter    writer{ _comp_group };
+
+    code.name(writer.get_name());
+    code.code(writer.get_code());
+    code.arguments(writer.get_tensors());
+    code.build_options(writer.get_build_options());
+    code.config_id(writer.get_config_id());
+    code.window(writer.get_window());
+
+    return code;
+}
+
+void GpuLogicalKernel::add_load_store()
+{
+    const auto dst_tensors = _comp_group.get_dst_tensors();
+    // Each dst tensor from the component group requires exactly one store component
+    for(const auto &dst_tensor : dst_tensors)
+    {
+        ArgumentPack<ITensorInfo> tensors;
+        // Pass same destination tensor to both source and destination of the store component
+        // In other words, the addition of a store component does not create a new dst tensor
+        // This way we avoid the issue of the dst tensor of the component group differs from that of a logical kernel
+        // This may seem to violate the acyclic-ness of the component graph. But it is fine because at the point of
+        // the construction of the logical kernel, we do not need a graph representation of components anymore
+        // (the graph has been serialized)
+        tensors.add_const_tensor(ACL_SRC_0, dst_tensor);
+        tensors.add_const_tensor(ACL_DST_0, dst_tensor);
+
+        auto store = _services->component_factory().create<ClComponentStore>(
+                         _comp_group.get_root_component()->properties(), // Store component share the same properties as that of the root component
+                         tensors);
+        _store_components.push_back(std::move(store));
+        auto success = _comp_group.add_component(_store_components.back().get());
+        ARM_COMPUTE_ERROR_ON(!success); // It's guaranteed that any load store insertion should be successful
+    }
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h
new file mode 100644
index 0000000000..4ce4443f60
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+class GpuComponentServices;
+class IGpuKernelComponent;
+
+/** A wrapper-processor of a @ref GpuKernelComponentGroup
+ * It adds the load (if any) and store components to the component group
+ * The @ref GpuLogicalKernel represents a complete kernel, and can proceed to invoke any kernel writer to generate the full kernel code
+ */
+class GpuLogicalKernel
+{
+public:
+    /** Constructor
+     *
+     * @param[in] services   @ref GpuComponentServices to be used
+     * @param[in] components Component group from which this logical kernel is initialized
+     */
+    explicit GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components);
+    /** Allow instances of this class to be copy constructed */
+    GpuLogicalKernel(const GpuLogicalKernel &) = default;
+    /** Allow instances of this class to be copied */
+    GpuLogicalKernel &operator=(const GpuLogicalKernel &) = default;
+    /** Allow instances of this class to be move constructed */
+    GpuLogicalKernel(GpuLogicalKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GpuLogicalKernel &operator=(GpuLogicalKernel &&) = default;
+    /** Generate a @ref GpuKernelSourceCode */
+    GpuKernelSourceCode write_kernel_code();
+
+private:
+    void add_load_store();
+
+    GpuComponentServices                             *_services;
+    GpuKernelComponentGroup                           _comp_group{};
+    std::vector<std::unique_ptr<IGpuKernelComponent>> _store_components{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp
new file mode 100644
index 0000000000..e8ef835405
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h"
+
+#include "arm_compute/core/Validate.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
+{
+    std::vector<DependencyGraph::TensorId> tensor_ids{};
+    std::transform(
+        std::begin(tensors), std::end(tensors),
+        std::back_inserter(tensor_ids),
+        [](const auto & t)
+    {
+        return t->id();
+    });
+    return tensor_ids;
+}
+
+} // namespace
+
+Operator::Operator(OperatorId id, GpuOperatorType operator_type, const ArgumentPack<ITensorInfo> &tensors)
+    : _id{ id }, _operator_type{ operator_type }, _tensors{ tensors }
+{
+}
+
+OperatorId Operator::id() const
+{
+    return _id;
+}
+
+GpuOperatorType Operator::operator_type() const
+{
+    return _operator_type;
+}
+
+ArgumentPack<ITensorInfo> Operator::tensors() const
+{
+    return _tensors;
+}
+
+bool GpuOperatorGroup::try_add_operator(const Operator &op) const
+{
+    const auto src_tensor_ids = get_tensor_ids(op.tensors().get_const_src_tensors());
+    const auto dst_tensor_ids = get_tensor_ids(op.tensors().get_const_dst_tensors());
+    // Constraint 1
+    if(!_graph.try_add_operator_as_linear(op.id(), src_tensor_ids, dst_tensor_ids))
+    {
+        return false;
+    }
+    // Constraint 2
+    if(_operators.size() >= max_fused_operators)
+    {
+        return false;
+    }
+    // Constraint 3.1: Pattern: (Unfusable)
+    if(_operators.size() > 0 && get_root_operator()->operator_type() == GpuOperatorType::Unfusable)
+    {
+        return false;
+    }
+    // Constraint 3.2
+    if(_operators.size() > 0 && (op.operator_type() != GpuOperatorType::Simple))
+    {
+        return false;
+    }
+    // Constraint 4
+    if(op.operator_type() != GpuOperatorType::Unfusable && op.tensors().get_const_dst_tensors().size() != 1U)
+    {
+        return false;
+    }
+    // Constraint 5
+    if(_operators.size() > 0)
+    {
+        const auto root_dst_tensors = get_root_operator()->tensors().get_const_dst_tensors();
+        ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
+        const auto first_dst_tensor = root_dst_tensors[0];
+        const auto dst_tensors      = op.tensors().get_const_dst_tensors();
+        for(const auto &t : root_dst_tensors)
+        {
+            if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+            {
+                return false;
+            }
+        }
+        for(const auto &t : dst_tensors)
+        {
+            if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+            {
+                return false;
+            }
+        }
+    }
+    // Constraint 6
+    if(_operators.size() > 0)
+    {
+        const auto root_dst_tensors = get_root_operator()->tensors().get_const_dst_tensors();
+        ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
+        const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
+        const auto dst_tensors             = op.tensors().get_const_dst_tensors();
+        for(const auto &t : root_dst_tensors)
+        {
+            if(t->data_layout() != first_dst_tensor_layout)
+            {
+                return false;
+            }
+        }
+        for(const auto &t : dst_tensors)
+        {
+            if(t->data_layout() != first_dst_tensor_layout)
+            {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+void GpuOperatorGroup::add_operator(const Operator &op)
+{
+    ARM_COMPUTE_ERROR_ON(!try_add_operator(op));
+    const auto src_tensor_ids = get_tensor_ids(op.tensors().get_const_src_tensors());
+    const auto dst_tensor_ids = get_tensor_ids(op.tensors().get_const_dst_tensors());
+    _graph.add_operator_as_linear(op.id(), src_tensor_ids, dst_tensor_ids);
+    _operators[op.id()] = op;
+}
+Operator GpuOperatorGroup::new_operator(const GpuOperatorType &operator_type, const ArgumentPack<ITensorInfo> &tensors) const
+{
+    auto new_id = static_cast<OperatorId>(_operators.size());
+    return Operator{ new_id, operator_type, tensors };
+}
+const Operator *GpuOperatorGroup::get_root_operator() const
+{
+    const auto roots = _graph.get_root_ops();
+    ARM_COMPUTE_ERROR_ON(roots.size() > 1);
+    if(roots.empty())
+    {
+        return nullptr;
+    }
+    return &_operators.at(roots[0]);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h
new file mode 100644
index 0000000000..35abe6c543
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORGROUP
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORGROUP
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h"
+#include "src/dynamic_fusion/sketch/utils/DependencyGraph.h"
+#include <map>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+using OperatorId = DependencyGraph::OperatorId;
+
+/** An operator for the sole purpose of validating fusion
+ */
+class Operator
+{
+public:
+    /** Default constructor */
+    Operator() = default;
+    /** Get Operator Id */
+    OperatorId id() const;
+    /** Get operator type */
+    GpuOperatorType operator_type() const;
+    /** Get tensor arguments */
+    ArgumentPack<ITensorInfo> tensors() const;
+    friend class GpuOperatorGroup;
+
+private:
+    Operator(OperatorId id, GpuOperatorType operator_type, const ArgumentPack<ITensorInfo> &tensors);
+    OperatorId                _id{};
+    GpuOperatorType           _operator_type{};
+    ArgumentPack<ITensorInfo> _tensors{};
+};
+
+/** A linear sequence of operators to be fused in a workload
+ *  For the time being, this class is only used for validating operator fusion
+ * INVARIANTS:
+ * @note These invariants are exactly the same as operator fusion constraints
+ * 1. Fusion is limited to a linear sequence of operators
+ * 2. Max number of operators that can be fused is @ref GpuOperatorGroup::max_fused_operators
+ * 3. The fusion is subject to the pattern: Complex + Simple * | Simple + Simple * | Un-fusable
+ * 4. All operator but unfusable, have exactly 1 dst tensor
+ * 5. All fused operators share the same dst tensor shape
+ * 6. All fused operators' tensors share the same @ref DataLayout
+ */
+class GpuOperatorGroup
+{
+public:
+    static constexpr size_t max_fused_operators = 32;
+    /** Try adding (without actually adding) an operator to the group
+     *
+     * @param[in] op Operator to be added
+     *
+     * @return true   If @p op can be added while maintaining the invariants
+     * @return false  Otherwise
+     */
+    bool try_add_operator(const Operator &op) const;
+    /** Add an operator to the group
+     *
+     * @param[in] op Operator to be added
+     */
+    void add_operator(const Operator &op);
+    /** Create a new operator
+     *
+     * @param[in] operator_type @ref GpuOperatorType of the new operator
+     * @param[in] tensors       Tensor arguments to the new operator
+     *
+     * @return Operator
+     */
+    Operator new_operator(const GpuOperatorType &operator_type, const ArgumentPack<ITensorInfo> &tensors) const;
+    /** Get the "root operator" of the group, which is the first operator in a linear sequence
+     * @return const Operator* Pointer to the root operator
+     */
+    const Operator *get_root_operator() const;
+
+private:
+    DependencyGraph _graph{};
+    std::map<OperatorId, Operator> _operators{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORGROUP */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h b/src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h
new file mode 100644
index 0000000000..c77697c343
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORPROPERTIES
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORPROPERTIES
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Contain properties common to all operator types */
+
+/** Operator type in the context of fusion
+ */
+enum class GpuOperatorType
+{
+    /** Simple operators are operators that:
+     *  1. Have a 1-to-1 mapping between the input elements and output elements, like elementwise
+     *  2. Have exactly 1 output
+     */
+    Simple,
+    /** Complex operators are operators that are not simple but are still fusable with simple ones
+     */
+    Complex,
+    /** Unfusable operators are operators that cannot be fused with any other types of operators
+     */
+    Unfusable
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORPROPERTIES */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
new file mode 100644
index 0000000000..623bf351f8
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
+#include "arm_compute/core/CL/CLCompileContext.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx)
+    : _gpu_language{ GpuLanguage::OpenCL }, _cl_compile_ctx{ cl_compile_ctx }
+{
+}
+
+GpuTarget GpuWorkloadContext::gpu_target() const
+{
+    return _cl_compile_ctx->get_gpu_target();
+}
+
+GpuLanguage GpuWorkloadContext::gpu_language() const
+{
+    return _gpu_language;
+}
+
+const CLCompileContext *GpuWorkloadContext::cl_compile_context() const
+{
+    return _cl_compile_ctx;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
new file mode 100644
index 0000000000..ce7cf1e908
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+GpuWorkloadSketch::GpuWorkloadSketch(Context *context)
+    : _impl{ std::make_unique<Implementation>(context) }
+{
+}
+GpuWorkloadSketch::~GpuWorkloadSketch()
+{
+}
+
+const GpuWorkloadSketch::Context *GpuWorkloadSketch::gpu_context() const
+{
+    return _impl->context();
+}
+
+TensorInfo GpuWorkloadSketch::create_tensor_info(const ITensorInfo &tensor_info)
+{
+    TensorInfo tensor{ tensor_info };
+    tensor.set_id(allocate_new_tensor_id());
+    return tensor;
+}
+
+TensorInfo GpuWorkloadSketch::create_tensor_info()
+{
+    TensorInfo tensor{};
+    tensor.set_id(allocate_new_tensor_id());
+    return tensor;
+}
+
+ITensorInfo::Id GpuWorkloadSketch::allocate_new_tensor_id()
+{
+    return _impl->allocate_new_tensor_id();
+}
+
+GpuWorkloadSketch::Implementation &GpuWorkloadSketch::implementation()
+{
+    return *_impl;
+}
+const GpuWorkloadSketch::Implementation &GpuWorkloadSketch::implementation() const
+{
+    return *_impl;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
new file mode 100644
index 0000000000..3997395c98
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Internal implementation of @ref GpuWorkloadSketch */
+class GpuWorkloadSketch::Implementation
+{
+public:
+    /** Constructor
+     *
+     * @param[in] context global workload creation context
+     */
+    explicit Implementation(
+        Context *context)
+        : _context{ context },
+          _comp_services{},
+          _component_graph{ &_comp_services },
+          _operator_group{}
+    {
+    }
+    /** Prevent instances of this class from being copy constructed */
+    Implementation(const Implementation &impl) = delete;
+    /** Prevent instances of this class from being copied */
+    Implementation &operator=(const Implementation &impl) = delete;
+    /** Allow instances of this class to be move constructed */
+    Implementation(Implementation &&impl) = default;
+    /** Allow instances of this class to be moved */
+    Implementation &operator=(Implementation &&impl) = default;
+    /** Get workload context */
+    const Context *context() const
+    {
+        return _context;
+    }
+    /** Get component graph */
+    const GpuKernelComponentGraph &component_graph() const
+    {
+        return _component_graph;
+    }
+    /** Get component graph */
+    GpuKernelComponentGraph &component_graph()
+    {
+        return _component_graph;
+    }
+    /** Get operator group */
+    const GpuOperatorGroup &operator_group() const
+    {
+        return _operator_group;
+    }
+    /** Get operator group */
+    GpuOperatorGroup &operator_group()
+    {
+        return _operator_group;
+    }
+    ITensorInfo::Id allocate_new_tensor_id()
+    {
+        return ++_next_id;
+    }
+    /** Generate @ref GpuWorkloadSourceCode from the workload sketch
+     * @note The sketch must be valid. Any error encountered during the building of the code will be thrown.
+     *
+     * @return GpuWorkloadSourceCode  The generated workload code
+     */
+    GpuWorkloadSourceCode generate_source_code() const
+    {
+        return component_graph().fuse().write_workload_code();
+    }
+
+private:
+    Context                *_context;
+    GpuComponentServices    _comp_services;
+    GpuKernelComponentGraph _component_graph;
+    GpuOperatorGroup        _operator_group;
+    ITensorInfo::Id         _next_id{ ITensorInfo::invalid_tensor_id };
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
new file mode 100644
index 0000000000..2375f5c6c6
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
+
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */
+using UnitWorkloadId = int32_t;
+
+/** Describes all the info related to a kernel in order to:
+ *  - be used by runtime to configure gpu kernel argument
+ *  - be used by memory managers to allocate required memory
+ */
+class GpuWorkloadArgument
+{
+public:
+    /** Default constructor */
+    GpuWorkloadArgument() = default;
+    /** Constructor
+     *
+     * @param[in] tensor_info     @ref ITensorInfo of the workload argument
+     * @param[in] mem_desc        @ref MemoryDescriptor of the workload argument
+     * @param[in] kernel_arg_info @ref GpuKernelArgumentInfo of the workload argument
+     */
+    GpuWorkloadArgument(const ITensorInfo           &tensor_info,
+                        const MemoryDescriptor      &mem_desc,
+                        const GpuKernelArgumentInfo &kernel_arg_info)
+        : _tensor_info{ tensor_info },
+          _mem_desc{ mem_desc },
+          _kernel_arg_info{ kernel_arg_info }
+    {
+    }
+    /** Get tensor id within workload */
+    ITensorInfo::Id id() const
+    {
+        return _tensor_info.id();
+    }
+    /** Get @ref ITensorInfo of the argument */
+    ITensorInfo *tensor_info()
+    {
+        return &_tensor_info;
+    }
+    /** Get @ref ITensorInfo of the argument */
+    const ITensorInfo *tensor_info() const
+    {
+        return &_tensor_info;
+    }
+    /** Get @ref MemoryDescriptor of the argument */
+    MemoryDescriptor *memory_descriptor()
+    {
+        return &_mem_desc;
+    }
+    /** Get @ref MemoryDescriptor of the argument */
+    const MemoryDescriptor *memory_descriptor() const
+    {
+        return &_mem_desc;
+    }
+    /** Get @ref GpuKernelArgumentInfo of the argument */
+    GpuKernelArgumentInfo *kernel_argument_info()
+    {
+        return &_kernel_arg_info;
+    }
+    /** Get @ref GpuKernelArgumentInfo of the argument */
+    const GpuKernelArgumentInfo *kernel_argument_info() const
+    {
+        return &_kernel_arg_info;
+    }
+    /** Check if the workload argument has valid id
+     *
+     * @return true   If has valid id
+     * @return false  Otherwise
+     */
+    bool has_valid_id() const
+    {
+        return _tensor_info.has_valid_id();
+    }
+
+private:
+    TensorInfo            _tensor_info{};
+    MemoryDescriptor      _mem_desc{};
+    GpuKernelArgumentInfo _kernel_arg_info{};
+};
+
+/** Describes when a unit workload is run.
+ */
+struct UnitWorkloadStage
+{
+    enum class Stage
+    {
+        Prepare, /**< Only run once at the beginning. */
+        Run,     /**< Run every time after the first time. */
+    };
+    Stage stage{ Stage::Run };
+};
+
+inline bool operator==(const UnitWorkloadStage &stage0, const UnitWorkloadStage &stage1)
+{
+    return stage0.stage == stage1.stage;
+}
+
+/** The atomic unit in a Gpu workload. It contains exactly one kernel to run.
+ */
+class GpuUnitWorkload
+{
+public:
+    /** Default constructor */
+    GpuUnitWorkload() = default;
+    /** Constructor
+     *
+     * @param[in] id          Id that uniquely identifies this unit workload in a workload
+     * @param[in] kernel_code @ref GpuKernelSourceCode contained within
+     * @param[in] stage       Stage of the unit workload
+     */
+    GpuUnitWorkload(UnitWorkloadId id, const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage)
+        : _id{ id }, _kernel_code{ kernel_code }, _stage{ stage }
+    {
+    }
+    /** Get the id of the unit workload */
+    UnitWorkloadId id() const
+    {
+        return _id;
+    }
+    /** Get reference to the underlying @ref GpuKernelSourceCode */
+    const GpuKernelSourceCode &code() const
+    {
+        return _kernel_code;
+    }
+    /** Get the stage of the unit workload */
+    UnitWorkloadStage stage() const
+    {
+        return _stage;
+    }
+
+private:
+    UnitWorkloadId      _id{};
+    GpuKernelSourceCode _kernel_code{};
+    UnitWorkloadStage   _stage{};
+};
+
+/** Hold the generated kernel source code and other information required to compile and run the workload.
+ */
+class GpuWorkloadSourceCode
+{
+public:
+    /** Default constructor */
+    GpuWorkloadSourceCode() = default;
+    /** Add a unit workload to the workload code
+     *
+     * @param[in] kernel_code @ref GpuKernelSourceCode to be contained within the unit workload
+     * @param[in] stage       Stage of the unit workload
+     * @param[in] mem_map     @ref MemoryDescriptor map for all tensors within the unit workload
+     *
+     * @return UnitWorkloadId  Allocated unit workload id
+     */
+    UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map)
+    {
+        // Use the size of the kernel codes as Id
+        const auto uwk_id    = static_cast<UnitWorkloadId>(_unit_workloads.size());
+        const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage);
+        _unit_workloads.push_back(unit_work);
+        // Assemble kernel argument with memory descriptor to form workload argument
+        for(const auto &id_arg : kernel_code.arguments())
+        {
+            const auto arg_id           = id_arg.first;
+            const auto arg              = id_arg.second;
+            _workload_arguments[arg_id] = GpuWorkloadArgument{ *arg.tensor_info(), mem_map.at(arg_id), *arg.kernel_argument_info() };
+            if(_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end())
+            {
+                _tensor_uwork_map[arg_id] = std::set<UnitWorkloadId>();
+            }
+            _tensor_uwork_map[arg_id].insert(uwk_id);
+        }
+        return uwk_id;
+    }
+    /** Get a unit workload from its id */
+    const GpuUnitWorkload &query_unit_workload(UnitWorkloadId id) const
+    {
+        ARM_COMPUTE_ERROR_ON(id < 0);
+        return _unit_workloads.at(id);
+    }
+    /** Get all unit workloads sorted in topological order */
+    std::vector<UnitWorkloadId> unit_workloads() const
+    {
+        std::vector<UnitWorkloadId> ids{};
+
+        for(const auto &uwk : _unit_workloads)
+        {
+            ids.push_back(uwk.id());
+        }
+        return ids;
+    }
+    /** Get a @ref GpuWorkloadArgument from its associated tensor id */
+    const GpuWorkloadArgument *query_tensor(ITensorInfo::Id t_id) const
+    {
+        return &_workload_arguments.at(t_id);
+    }
+    /** Get all tensors in the entire workload */
+    std::vector<ITensorInfo::Id> tensors() const
+    {
+        std::vector<ITensorInfo::Id> ids{};
+        for(const auto &id_tensor : _workload_arguments)
+        {
+            ids.push_back(id_tensor.first);
+        }
+        return ids;
+    }
+    /** Get all unit workloads connected to the tensor with @p t_id */
+    std::vector<UnitWorkloadId> get_unit_workloads_from_tensor(ITensorInfo::Id t_id) const
+    {
+        const auto unit_work_set = _tensor_uwork_map.at(t_id);
+        return std::vector<UnitWorkloadId>(unit_work_set.begin(), unit_work_set.end());
+    }
+
+private:
+    std::vector<GpuUnitWorkload> _unit_workloads{};
+    std::map<ITensorInfo::Id, GpuWorkloadArgument>      _workload_arguments{};
+    std::map<ITensorInfo::Id, std::set<UnitWorkloadId>> _tensor_uwork_map{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE */
diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
new file mode 100644
index 0000000000..ae67790b4b
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/Window.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+
+#include <map>
+#include <string>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** An interface that can write a gpu kernel
+ */
+class IGpuKernelWriter
+{
+public:
+    /** Destructor */
+    virtual ~IGpuKernelWriter()
+    {
+    }
+    /** Generate kernel name */
+    virtual std::string get_name() = 0;
+    /** Generate kernel code */
+    virtual std::string get_code() = 0;
+    /** Generate build options */
+    virtual CLBuildOptions get_build_options() = 0;
+    /** Generate config id string of the entire kernel. This is used for tuning */
+    virtual std::string get_config_id() = 0;
+    /** Generate execution window */
+    virtual Window get_window() const = 0;
+    /** Get the kernel argument lists of the kernel*/
+    virtual std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() = 0;
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER */
diff --git a/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h b/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h
new file mode 100644
index 0000000000..f7f0029618
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_GPUKERNELCOMPONENTFACTORY
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_GPUKERNELCOMPONENTFACTORY
+
+#include "Types.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include <memory>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Factory class that creates new instances of @ref IGpuKernelComponent by assigning new component ids
+ */
+class GpuKernelComponentFactory
+{
+public:
+    /** Create a new kernel component
+     *
+     * @tparam T      Any polymorphic type descending from @ref IGpuKernelComponent
+     * @tparam Args   Argument types to construct the kernel component
+     *
+     * @param[in] args Arguments to construct the kernel component
+     *
+     * @return std::unique_ptr<IGpuKernelComponent>
+     */
+    template <typename T, typename... Args>
+    std::unique_ptr<IGpuKernelComponent> create(Args &&... args)
+    {
+        return std::make_unique<T>(_count++, std::forward<Args>(args)...);
+    }
+
+private:
+    ComponentId _count{ 0 };
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_GPUKERNELCOMPONENTFACTORY */
diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
new file mode 100644
index 0000000000..8bb19155a2
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
+
+#include "Types.h"
+
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Properties common to all kernel component types */
+class KernelProperties
+{
+public:
+    KernelProperties &stage(const UnitWorkloadStage &stage)
+    {
+        _stage = stage;
+        return *this;
+    }
+    UnitWorkloadStage stage() const
+    {
+        return _stage;
+    }
+
+private:
+    UnitWorkloadStage _stage{};
+};
+
+inline bool operator==(const KernelProperties &config0, const KernelProperties &config1)
+{
+    return config0.stage() == config1.stage();
+}
+
+/** Forward declaration */
+class IGpuTemplateComponentWriter;
+
+/** An abstract interface of a component. It enables manipulation by the component graph for purposes like fusion
+ */
+class IGpuKernelComponent
+{
+public:
+    using Properties = KernelProperties;
+
+public:
+    /** Constructor
+     *
+     * @param[in] id         Component id
+     * @param[in] properties Kernel component properties
+     * @param[in] tensors    Tensor arguments to the components
+     */
+    IGpuKernelComponent(
+        ComponentId                      id,
+        const Properties                &properties,
+        const ArgumentPack<ITensorInfo> &tensors)
+        : _id{ id },
+          _properties{ properties },
+          _tensors{ tensors }
+    {
+    }
+    /** Destructor */
+    virtual ~IGpuKernelComponent()
+    {
+    }
+    /** Get component id */
+    ComponentId id() const
+    {
+        return _id;
+    }
+    /** Get tensor arguments */
+    ArgumentPack<ITensorInfo> tensors() const
+    {
+        return _tensors;
+    }
+    /** Get properties */
+    Properties properties() const
+    {
+        return _properties;
+    }
+    /** Get template writer for the component */
+    virtual const IGpuTemplateComponentWriter *template_writer() const = 0;
+    /** Get component type */
+    virtual GpuComponentType type() const = 0;
+
+private:
+    ComponentId               _id{ -1 };
+    Properties                _properties{};
+    ArgumentPack<ITensorInfo> _tensors{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT */
diff --git a/src/dynamic_fusion/sketch/gpu/components/Types.h b/src/dynamic_fusion/sketch/gpu/components/Types.h
new file mode 100644
index 0000000000..54b3a69057
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/Types.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_TYPES
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_TYPES
+
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Uniquely identifies a kernel component within a workload
+ */
+using ComponentId = int32_t;
+
+/** Component type in the context of fusion
+ *  Its main purpose is to inform the optimizer how to perform fusion.
+ */
+enum class GpuComponentType
+{
+    Complex,
+    Simple,
+    Unfusable,
+    Output
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_TYPES */
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
new file mode 100644
index 0000000000..e94cfd1581
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClComponentDirectConv2d.h"
+
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+using Settings = ClComponentDirectConv2dSettings;
+
+Settings &Settings::export_to_cl_image(bool cl_image)
+{
+    _export_to_cl_image = cl_image;
+    return *this;
+}
+
+bool Settings::export_to_cl_image() const
+{
+    return _export_to_cl_image;
+}
+
+Settings &Settings::fast_relaxed_math(bool fast_relaxed_math)
+{
+    _fast_relaxed_math = fast_relaxed_math;
+    return *this;
+}
+
+bool Settings::fast_relaxed_math() const
+{
+    return _fast_relaxed_math;
+}
+
+Status ClComponentDirectConv2d::validate(
+    const Properties                &properties,
+    const ArgumentPack<ITensorInfo> &tensors,
+    const Attributes                &attributes,
+    const Settings                  &settings)
+{
+    ARM_COMPUTE_UNUSED(properties, settings);
+    const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+    const auto wei = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+    const auto bia = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+    const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst);
+
+    // 1. Check validity
+    // Matching data type
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, wei);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+    if(bia != nullptr)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, bia);
+    }
+
+    // Matching data layout
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, wei);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
+    if(bia != nullptr)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, bia);
+    }
+
+    // All tensor infos are initialized
+    ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
+    ARM_COMPUTE_RETURN_ERROR_ON(wei->tensor_shape().total_size() == 0);
+    ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+    if(bia != nullptr)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON(bia->tensor_shape().total_size() == 0);
+    }
+    // Device requirements are met
+    ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+    // wei shape is correct
+    const DataLayout data_layout = src->data_layout();
+    const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->dimension(channel_idx) != src->dimension(channel_idx), "Weights feature map dimension should match the respective src's one");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->num_dimensions() > 4, "Weights can be at most 4 dimensional");
+
+    // dst shape is correct
+    PadStrideInfo legacy_pad_stride(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, attributes.pad().right, attributes.pad().top,
+                                    attributes.pad().bottom, DimensionRoundingType{});
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(),
+                                                       misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, legacy_pad_stride));
+
+    // bia shape is correct
+    if(bia != nullptr)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->dimension(0) != wei->dimension(3),
+                                        "Biases size and number of dst feature maps should match");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->num_dimensions() > 1,
+                                        "Biases should be one dimensional");
+    }
+
+    // 2. Check support level
+    // Data type
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
+    // Data layout
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
+
+    return Status{};
+}
+
+ClComponentDirectConv2d::ClComponentDirectConv2d(
+    ComponentId                      id,
+    const Properties                &properties,
+    const ArgumentPack<ITensorInfo> &tensors,
+    const Attributes                &attributes,
+    const Settings                  &settings)
+    : IGpuKernelComponent{ id, properties, tensors },
+      _component_writer{ std::make_unique<ClTemplateDirectConv2d>(id, tensors, attributes, settings) }
+{
+}
+ClComponentDirectConv2d::~ClComponentDirectConv2d()
+{
+}
+const IGpuTemplateComponentWriter *ClComponentDirectConv2d::template_writer() const
+{
+    return _component_writer.get();
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
new file mode 100644
index 0000000000..fec22b84a5
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D
+
+#include "arm_compute/core/Error.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include <memory>
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+template <typename T>
+class ArgumentPack;
+class Conv2dAttributes;
+
+/** Component specific settings
+ */
+class ClComponentDirectConv2dSettings
+{
+public:
+    /** Set export_to_cl_image flag */
+    ClComponentDirectConv2dSettings &export_to_cl_image(bool cl_image);
+    /** Get export_to_cl_image flag */
+    bool export_to_cl_image() const;
+
+    /** Set fast_relaxed_math flag */
+    ClComponentDirectConv2dSettings &fast_relaxed_math(bool fast_relaxed_math);
+    /** Get fast_relaxed_math flag */
+    bool fast_relaxed_math() const;
+
+private:
+    bool _export_to_cl_image{ false };
+    bool _fast_relaxed_math{ true };
+};
+
+/** Forward declaration */
+class ClTemplateDirectConv2d;
+
+class ClComponentDirectConv2d final : public IGpuKernelComponent
+{
+public:
+    /** Attributes are a set of backend-agnostic parameters that define what a component does */
+    using Attributes = Conv2dAttributes;
+    /** Settings are a set of backend-specific parameters that influence the implementation of a component */
+    using Settings = ClComponentDirectConv2dSettings;
+
+public:
+    /** Validate the component
+     *
+     * @param[in]     properties Component properties
+     * @param[in,out] tensors    Tensor arguments to the component
+     * @param[in]     attributes Component attributes
+     * @param[in]     settings   Component settings
+     *
+     * @return Status       Validation results
+     *
+     * Tensor argument names:
+     * - ACL_SRC_0: Input
+     * - ACL_SRC_1: Weight
+     * - ACL_SRC_2: Bias (Optional)
+     * - ACL_DST_0: Output
+     *
+     * Tensor argument constness:
+     * - ACL_SRC_0: Const
+     * - ACL_SRC_1: Const
+     * - ACL_SRC_2: Const
+     * - ACL_DST_0: Const
+     *
+     * Valid data layouts:
+     * - NHWC
+     *
+     * Valid data type configurations:
+     * |ACL_SRC_0      |ACL_SRC_1      |ACL_SRC_2      |ACL_DST_0      |
+     * |:--------------|:--------------|:--------------|:--------------|
+     * |F16            |F16            |F16            |F16            |
+     * |F32            |F32            |F32            |F32            |
+     */
+    static Status validate(
+        const Properties                &properties,
+        const ArgumentPack<ITensorInfo> &tensors,
+        const Attributes                &attributes,
+        const Settings                  &settings);
+
+    /** Constructor
+     *
+     * Similar to @ref ClComponentDirectConv2d::validate()
+     */
+    ClComponentDirectConv2d(
+        ComponentId                      id,
+        const Properties                &properties,
+        const ArgumentPack<ITensorInfo> &tensors,
+        const Attributes                &attributes,
+        const Settings                  &settings);
+
+    /** Destructor */
+    ~ClComponentDirectConv2d() override;
+    /** Prevent instances of this class from being copy constructed */
+    ClComponentDirectConv2d(const ClComponentDirectConv2d &component) = delete;
+    /** Prevent instances of this class from being copied */
+    ClComponentDirectConv2d &operator=(const ClComponentDirectConv2d &component) = delete;
+    /** Allow instances of this class to be move constructed */
+    ClComponentDirectConv2d(ClComponentDirectConv2d &&component) = default;
+    /** Allow instances of this class to be moved */
+    ClComponentDirectConv2d &operator=(ClComponentDirectConv2d &&component) = default;
+    /** Get template writer for the component */
+    const IGpuTemplateComponentWriter *template_writer() const override;
+    /** Get component type */
+    GpuComponentType type() const override
+    {
+        return GpuComponentType::Complex;
+    }
+
+private:
+    std::unique_ptr<ClTemplateDirectConv2d> _component_writer;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D */
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
new file mode 100644
index 0000000000..f49f397ec1
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClComponentStore.h"
+
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+Status ClComponentStore::validate(
+    const Properties                &properties,
+    const ArgumentPack<ITensorInfo> &tensors)
+{
+    ARM_COMPUTE_UNUSED(properties, tensors);
+    return Status{};
+}
+ClComponentStore::ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors)
+    : IGpuKernelComponent{ id, properties, tensors }, _component_writer{ std::make_unique<ClTemplateStore>(id, tensors) }
+{
+}
+ClComponentStore::~ClComponentStore()
+{
+}
+const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const
+{
+    return _component_writer.get();
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
new file mode 100644
index 0000000000..bf8c9f031e
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
+
+#include "arm_compute/core/Error.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h"
+#include <memory>
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+template <typename T>
+class ArgumentPack;
+
+class ClComponentStore final : public IGpuKernelComponent
+{
+public:
+    /** Validate the component
+     *
+     * @param[in] properties Component properties
+     * @param[in] tensors    Tensor arguments to the components
+     *
+     * @return Status        Validation results
+     *
+     * Tensor argument names:
+     * - ACL_SRC_0: Input
+     * - ACL_DST_0: Output
+     *
+     * Tensor argument constness:
+     * - ACL_SRC_0: Const
+     * - ACL_DST_0: Const
+     *
+     * Valid data layouts:
+     * - NHWC
+     *
+     * Valid data type configurations:
+     * |ACL_SRC_0      |ACL_DST_0      |
+     * |:--------------|:--------------|
+     * |All            |All            |
+     */
+    static Status validate(
+        const Properties                &properties,
+        const ArgumentPack<ITensorInfo> &tensors);
+    /** Constructor
+     *
+     * Similar to @ref ClComponentStore::validate()
+     */
+    ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors);
+    /** Destructor */
+    ~ClComponentStore() override;
+    /** Prevent instances of this class from being copy constructed */
+    ClComponentStore(const ClComponentStore &component) = delete;
+    /** Prevent instances of this class from being copied */
+    ClComponentStore &operator=(const ClComponentStore &component) = delete;
+    /** Allow instances of this class to be move constructed */
+    ClComponentStore(ClComponentStore &&component) = default;
+    /** Allow instances of this class to be moved */
+    ClComponentStore &operator=(ClComponentStore &&component) = default;
+    /** Get template writer for the component */
+    const IGpuTemplateComponentWriter *template_writer() const override;
+    /** Get component type */
+    GpuComponentType type() const override
+    {
+        return GpuComponentType::Output;
+    }
+
+private:
+    std::unique_ptr<ClTemplateStore> _component_writer;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE */
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
new file mode 100644
index 0000000000..98c1cc3939
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h"
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+bool export_to_cl_image_support(const ITensorInfo *tensor, GPUTarget gpu_target, const cl::Device &device, DataLayout data_layout)
+{
+    if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC))
+    {
+        return false;
+    }
+
+    // If not floating point
+    if(!is_data_type_float(tensor->data_type()))
+    {
+        return false;
+    }
+
+    if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
+    {
+        return false;
+    }
+
+    // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
+    if(!image2d_from_buffer_supported(device))
+    {
+        return false;
+    }
+
+    // Check cl image pitch alignment
+    if(get_cl_image_pitch_alignment(device) == 0)
+    {
+        return false;
+    }
+
+    const size_t image_w     = tensor->tensor_shape()[0] / 4;
+    const size_t image_h     = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
+    const size_t max_image_w = device.getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
+    const size_t max_image_h = device.getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
+
+    if(image_w > max_image_w || image_h > max_image_h)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+GpuOperatorType operator_type = GpuOperatorType::Complex;
+} // namespace
+
+Status GpuConv2d::validate_op(const GpuWorkloadSketch &sketch,
+                              const ITensorInfo       *src,
+                              const ITensorInfo       *wei,
+                              const ITensorInfo       *bia,
+                              const ITensorInfo       *dst,
+                              const Conv2dAttributes &attributes)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst);
+    ARM_COMPUTE_RETURN_ERROR_ON(
+        !src->has_valid_id() || !wei->has_valid_id() || !dst->has_valid_id());
+    if(bia != nullptr)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id());
+    }
+
+    // Perform fusion test
+    // Pack tensor infos
+    ArgumentPack<ITensorInfo> tensors;
+    tensors.add_const_tensor(ACL_SRC_0, src);
+    tensors.add_const_tensor(ACL_SRC_1, wei);
+    tensors.add_const_tensor(ACL_SRC_2, bia);
+    tensors.add_const_tensor(ACL_DST_0, dst);
+    const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op),
+                                    "Operator fusion test failed. This operator cannot be fused into the workload");
+
+    // Auto initialize dst tensor info
+    TensorInfo dst_info_to_validate = *dst;
+    const auto data_layout          = src->data_layout();
+
+    {
+        auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), data_layout, wei->tensor_shape(),
+                                                                            PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+                                                                                          attributes.pad().right,
+                                                                                          attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
+
+        auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(shape));
+    }
+
+    // Check support level
+    // Data type
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
+    // Data layout
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
+
+    const auto sketch_ctx = sketch.implementation().context();
+
+    const auto gpu_target = sketch_ctx->gpu_target();
+
+    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    {
+        const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
+        ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
+        // Validate Direct Conv2d Component
+        {
+            const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+            auto       settings   = ClComponentDirectConv2d::Settings();
+
+            settings.export_to_cl_image(
+                export_to_cl_image_support(src, gpu_target, cl_compile_ctx->get_device(), data_layout));
+
+            settings.fast_relaxed_math(
+                (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
+                && (dst_info_to_validate.data_type() == DataType::F32 || dst_info_to_validate.data_type() == DataType::F16));
+
+            ArgumentPack<ITensorInfo> arguments;
+            arguments.add_const_tensor(ACL_SRC_0, src);
+            arguments.add_const_tensor(ACL_SRC_1, wei);
+            arguments.add_const_tensor(ACL_SRC_2, bia);
+            arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+            ARM_COMPUTE_RETURN_ON_ERROR(ClComponentDirectConv2d::validate(properties, arguments, attributes, settings));
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_RETURN_ERROR_MSG("Unimplemented Gpu language");
+    }
+    return Status{};
+}
+
+void GpuConv2d::create_op(GpuWorkloadSketch      &sketch,
+                          ITensorInfo            *src,
+                          ITensorInfo            *wei,
+                          ITensorInfo            *bia,
+                          ITensorInfo            *dst,
+                          const Conv2dAttributes &attributes)
+{
+    // Assert validation
+    ARM_COMPUTE_ERROR_THROW_ON(GpuConv2d::validate_op(sketch, src, wei, bia, dst, attributes));
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei, dst);
+    const auto data_layout = src->data_layout();
+
+    // Auto initialize dst tensor
+    {
+        auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), data_layout, wei->tensor_shape(),
+                                                                            PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+                                                                                          attributes.pad().right,
+                                                                                          attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
+
+        auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));
+    }
+
+    // Translate into components and add to component graph
+    auto &comp_graph = sketch.implementation().component_graph();
+
+    const auto sketch_ctx = sketch.implementation().context();
+
+    const auto gpu_target = sketch_ctx->gpu_target();
+
+    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    {
+        const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
+        ARM_COMPUTE_ERROR_ON(cl_compile_ctx == nullptr);
+
+        // Add Direct Conv2d Component
+        {
+            auto properties = IGpuKernelComponent::Properties();
+            properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+
+            auto settings = ClComponentDirectConv2d::Settings();
+
+            settings.export_to_cl_image(
+                export_to_cl_image_support(src, gpu_target, cl_compile_ctx->get_device(), data_layout));
+
+            settings.fast_relaxed_math(
+                (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
+                && (dst->data_type() == DataType::F32 || dst->data_type() == DataType::F16));
+
+            if(settings.export_to_cl_image())
+            {
+                arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei);
+            }
+
+            ArgumentPack<ITensorInfo> arguments;
+            arguments.add_const_tensor(ACL_SRC_0, src);
+            arguments.add_const_tensor(ACL_SRC_1, wei);
+            arguments.add_const_tensor(ACL_SRC_2, bia);
+            arguments.add_const_tensor(ACL_DST_0, dst);
+            comp_graph.add_new_component<ClComponentDirectConv2d>(properties, arguments, attributes, settings);
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_ERROR("Unimplemented Gpu language");
+    }
+
+    // Set up fusion test by adding to the Operator Group
+    // Note this has to be performed after all the components have been successfully added to the component graph
+
+    // Pack tensor infos
+    ArgumentPack<ITensorInfo> tensors;
+    tensors.add_const_tensor(ACL_SRC_0, src);
+    tensors.add_tensor(ACL_SRC_1, wei);
+    tensors.add_const_tensor(ACL_SRC_2, bia);
+    tensors.add_tensor(ACL_DST_0, dst);
+
+    const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+    sketch.implementation().operator_group().add_operator(op);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
new file mode 100644
index 0000000000..13c0b141a5
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuKernelVariableTable.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/ITensorInfo.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+void GpuKernelVariableTable::declare_variable(const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, bool is_interm, const std::string &alias)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
+    // Do not re-declare if the variable associated with the tensor has already been declared
+    if(get_variable(tensor).has_valid_id())
+    {
+        ARM_COMPUTE_ERROR_ON(!(get_variable(tensor).kernel_argument_info == argument_info));
+        return;
+    }
+    // Declare variable associated with the tensor
+    std::stringstream ss;
+    ss << alias << "_t" << tensor->id();
+    const auto     uniq_name = ss.str();
+    TensorVariable var{ tensor->id(), uniq_name, argument_info };
+
+    if(is_interm)
+    {
+        _interm_var = var;
+        _interm_tensors.insert(tensor->id());
+    }
+    else
+    {
+        _vars.emplace(tensor->id(), var);
+    }
+}
+
+GpuKernelVariableTable::TensorVariable GpuKernelVariableTable::get_variable(const ITensorInfo *tensor) const
+{
+    const TensorVariable empty_var{};
+    if(_vars.find(tensor->id()) != _vars.end())
+    {
+        return _vars.at(tensor->id());
+    }
+    if(_interm_tensors.find(tensor->id()) != _interm_tensors.end())
+    {
+        return _interm_var;
+    }
+    return empty_var;
+}
+
+GpuKernelVariableTable::VariableList GpuKernelVariableTable::get_variable_list(const std::vector<const ITensorInfo *> &tensors) const
+{
+    VariableList vars{};
+    for(const auto &tensor : tensors)
+    {
+        if(!tensor->has_valid_id())
+        {
+            continue;
+        }
+        vars.push_back(get_variable(tensor));
+    }
+    return vars;
+}
+
+TagVal::TagVal(const GpuKernelVariableTable::TensorVariable &var)
+    : value{ var.uniq_name }
+{
+}
+
+TagVal::TagVal(const std::string &val)
+    : value{ val }
+{
+}
+
+TagVal::TagVal(const char *val)
+    : value{ std::string(val) }
+{
+}
+
+TagVal::TagVal(const DataType &data_type)
+    : value{ get_cl_type_from_data_type(data_type) }
+{
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
new file mode 100644
index 0000000000..4eee3963c2
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "support/Requires.h"
+#include "support/StringSupport.h"
+
+#include <set>
+#include <string>
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** A table of all the variables used in the kernel
+ * Since fusion is restricted to a linear sequence of components in a kernel, only a single "intermediate variable" (the accumulator) is allowed.
+ * Each kernel has exactly one variable table
+ */
+class GpuKernelVariableTable
+{
+public:
+    /** A tensor variable whose main purposes are:
+     *  - Hold the newly assigned @ref GpuKernelArgumentInfo for the associated tensor info
+     *  - Hold the generated variable name for the associated tensor info
+     */
+    struct TensorVariable
+    {
+    public:
+        TensorVariable()                       = default;
+        TensorVariable(const TensorVariable &) = default;
+        TensorVariable       &operator=(const TensorVariable &) = default;
+        ITensorInfo::Id       id{ ITensorInfo::invalid_tensor_id };
+        std::string           uniq_name{ "empty" }; // Unique name, also the final variable name used in the built code
+        GpuKernelArgumentInfo kernel_argument_info{};
+        bool                  has_valid_id() const
+        {
+            return id != ITensorInfo::invalid_tensor_id;
+        }
+    };
+    using VariableList = std::vector<TensorVariable>;
+
+public:
+    /** Declare a @ref TensorVariable for a corresponding tensor info.
+     *
+     * @note: Later re-declaration of the intermediate variable will overwrite the previous association to the @ref ITensorInfo
+     *        Therefore, the order of declaration is important. It's assumed that the components declaring the variable is already in correct order
+     *
+     * @param[in] tensor        Tensor info with which the new variable is associated
+     * @param[in] argument_info Kernel argument information
+     * @param[in] is_interm     If the new variable is an intermediate variable
+     * @param[in] alias         Alias for the variable. Will be used as part of the variable name
+     */
+    void declare_variable(const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, bool is_interm = false, const std::string &alias = "unnamed");
+    /** Get the @ref TensorVariable associated with @p tensor
+     *
+     * @param[in] tensor Tensor info to be queried
+     *
+     * @return TensorVariable
+     */
+    TensorVariable get_variable(const ITensorInfo *tensor) const;
+    /** Get the @ref TensorVariable list associated with @p tensors
+     * @note Empty tensors are skipped
+     *
+     * @param[in] tensors List of tensor infos to be queried
+     *
+     * @return VariableList
+     */
+    VariableList get_variable_list(const std::vector<const ITensorInfo *> &tensors) const;
+
+private:
+    std::map<ITensorInfo::Id, TensorVariable> _vars{}; /**< Non-intermediate (function parameter) variables*/
+    TensorVariable            _interm_var{};           /**< Intermediate variable */
+    std::set<ITensorInfo::Id> _interm_tensors{};       /**< Tensors associated with the single intermediate variable */
+};
+
+/** A tag value will substitute a tag in a string template during its instantiation */
+struct TagVal
+{
+    /** Default constructor */
+    TagVal() = default;
+    /** Construct a @ref TagVal from a @ref GpuKernelVariableTable::TensorVariable */
+    TagVal(const GpuKernelVariableTable::TensorVariable &var);
+    /** Construct a @ref TagVal from an integral type */
+    template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
+    TagVal(T val)
+        : value{ support::cpp11::to_string(val) }
+    {
+    }
+    /** Construct a @ref TagVal from a string */
+    TagVal(const std::string &val);
+    /** Construct a @ref TagVal from a c-style string */
+    TagVal(const char *val);
+    /** Construct a @ref TagVal from a @ref DataType */
+    TagVal(const DataType &data_type);
+    /** Get the value of the TagVal as a converted string */
+    std::string value{};
+};
+
+/** A tag used in a string template is a placeholder string to be substituted by real values during template instantiation */
+using Tag = std::string;
+
+/** Tag lookup table. It is used to instantiate a string template */
+using TagLUT = std::unordered_map<Tag, TagVal>;
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
new file mode 100644
index 0000000000..c85ddf5a2c
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/Window.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/components/Types.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+class GpuKernelComponentGroup;
+class GpuKernelVariableTable;
+
+/** An interface used by @ref ClTemplateWriter to write source code for a kernel component
+ */
+class IGpuTemplateComponentWriter
+{
+public:
+    using ComponentGroup = GpuKernelComponentGroup;
+
+public:
+    /** Constructor
+     *
+     * @param[in] id      Component id
+     * @param[in] tensors Tensor arguments to the components
+     */
+    IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> tensors)
+        : _id{ id }, _tensors{ tensors }
+    {
+    }
+    /** Destructor */
+    virtual ~IGpuTemplateComponentWriter()
+    {
+    }
+    /** Generate kernel component name */
+    virtual std::string get_name() const = 0;
+    /** Generate kernel component code template
+     *
+     * @param[in] comp_group Component group of which the component is a part of
+     *
+     * @return std::string Component code
+     */
+    virtual std::string get_component_code(const ComponentGroup &comp_group) const = 0;
+    /** Declare all variables used by the component in the @p vtable
+     *
+     * @param[out] vtable     Variable table
+     * @param[in]  comp_group Component group of which the component is a part of
+     */
+    virtual void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0;
+    /** Generate the tag look-up table used to instantiate the component code.
+     *
+     * @param[in] vtable     Variable table
+     * @param[in] comp_group Component group of which the component is a part of
+     *
+     * @return TagLUT  Tag lookup table
+     */
+    virtual TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0;
+    /** Generate additional macros used in the component */
+    virtual std::string get_additional_macros() const
+    {
+        return "";
+    }
+    /** Generate the build options used in the component
+     *
+     * @param[in] comp_group Component group of which the component is a part of
+     *
+     * @return CLBuildOptions Build options
+     */
+    virtual CLBuildOptions get_build_options(const ComponentGroup &comp_group) const
+    {
+        ARM_COMPUTE_UNUSED(comp_group);
+        return CLBuildOptions{};
+    }
+    /** Generate the component config id string used for tuning */
+    virtual std::string get_config_id() const
+    {
+        return "";
+    }
+    /** Generate the header list used in the component */
+    virtual std::set<std::string> get_headers_list() const
+    {
+        return std::set<std::string> {};
+    }
+    /** Generate the execution window for the component */
+    virtual Window get_window() const
+    {
+        return Window{};
+    }
+    /** Get tensor arguments */
+    ArgumentPack<ITensorInfo> tensors() const
+    {
+        return _tensors;
+    }
+    /** Get component id */
+    ComponentId id() const
+    {
+        return _id;
+    }
+
+private:
+    ComponentId               _id{ -1 };
+    ArgumentPack<ITensorInfo> _tensors{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
new file mode 100644
index 0000000000..870de64eb8
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateDirectConv2d.h"
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+ClTemplateDirectConv2d::ClTemplateDirectConv2d(ComponentId                      id,
+                                               const ArgumentPack<ITensorInfo> &tensors,
+                                               const Attributes                &attributes,
+                                               const Settings                  &settings)
+    : IGpuTemplateComponentWriter{ id, tensors },
+      _src{},
+      _weight{},
+      _bias{},
+      _dst{},
+      _attributes{ attributes },
+      _settings{ settings }
+{
+    _src    = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
+    if(this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
+    {
+        _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
+    }
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst);
+}
+
+std::string ClTemplateDirectConv2d::get_name() const
+{
+    return "direct_conv2d";
+}
+
+std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &comp_group) const
+{
+    ARM_COMPUTE_UNUSED(comp_group);
+
+    const auto channel_idx   = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
+    const auto k0            = adjust_vec_size(is_data_type_quantized(_src->data_type()) ? 16u : 8u, _src->dimension(channel_idx));
+    const bool leftover_loop = (_src->dimension(channel_idx) % k0) != 0;
+
+    std::string code = R"_(
+//------------------ START KERNEL {{meta_kernel_id}} ---------------------
+// IN_0(src)            {{src}}
+// IN_1(wei)            {{weight}}
+)_";
+    if(_bias && _bias->has_valid_id())
+    {
+        code += R"_(
+// IN_1(bia)            {{bias}}
+)_";
+    }
+    code += R"_(
+// OUT(dst, accum)      {{dst}}
+
+// Initialize the accumulators
+TILE({{ACC_DATA_TYPE}}, M0, N0, {{dst}});
+{
+    // All the tensor dimensions are passed at compile time.
+    // In case of dynamic tensor support, the following dimensions should be passed as function argument.
+#define _IWEI_WIDTH {{WEI_WIDTH}}
+#define _IWEI_HEIGHT {{WEI_HEIGHT}}
+#define _ISRC_WIDTH {{src}}_w
+#define _ISRC_HEIGHT {{src}}_h
+#define _ISRC_CHANNELS {{src}}_c
+#define _IDST_WIDTH {{arg_dst}}_w
+#define _IDST_HEIGHT {{arg_dst}}_h
+#define _IDST_CHANNELS {{arg_dst}}_c
+#define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT)
+
+    // .v    = access the whole vector (OpenCL vector)
+    // .s[x] = access the vector element at position x (scalar access)
+    TILE(int, M0, 1, xi);
+    TILE(int, M0, 1, yi);
+
+    // Convert the linear index to coordinate
+    LOOP_UNROLLING(int, i, 0, 1, M0,
+    {
+        xi[i].v = ((g_ind_1 + i) % _IDST_WIDTH) * {{STRIDE_X}};
+        yi[i].v = ((g_ind_1 + i) / _IDST_WIDTH) * {{STRIDE_Y}};
+        xi[i].v -= {{PAD_LEFT}};
+        yi[i].v -= {{PAD_TOP}};
+    })
+
+    LOOP_UNROLLING(int, i, 0, 1, M0,
+    {
+        {{dst}}[i].v = 0;
+    })
+
+    for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i)
+    {
+        int ck = 0;
+        int xk = i % _IWEI_WIDTH;
+        int yk = i / _IWEI_WIDTH;
+
+        int k = 0;
+        for(; k <= (_ISRC_CHANNELS - K0); k += K0)
+        {
+            TILE({{SRC_DATA_TYPE}}, M0, K0, a);
+            TILE({{WEI_DATA_TYPE}}, N0, K0, b);
+
+            // Initialize tiles
+            LOOP_UNROLLING(int, i, 0, 1, M0,
+            {
+                a[i].v = {{ZERO_VALUE}};
+            })
+
+            LOOP_UNROLLING(int, i, 0, 1, N0,
+            {
+                b[i].v = {{ZERO_VALUE}};
+            })
+
+            // Load tile from the src tensor
+            T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, g_ind_2, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
+
+            // Load tile from the weights tensor
+            T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
+
+            // Compute the matrix multiplication between two tiles
+            T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}});
+
+            ck += K0;
+        }
+
+        // We voluntarily use SRC_CHANNELS rather than _DSRC_CHANNELS
+        // This #if directive should be removed in case of dynamic tensor support
+)_";
+
+    if(leftover_loop)
+    {
+        code += R"_(
+        // Left-over accumulations
+        for(; k < _ISRC_CHANNELS; ++k)
+        {
+            TILE({{SRC_DATA_TYPE}}, M0, 1, a);
+            TILE({{WEI_DATA_TYPE}}, N0, 1, b);
+
+            // Initialize tiles
+            LOOP_UNROLLING(int, i, 0, 1, M0,
+            {
+                a[i].v = {{ZERO_VALUE}};
+            })
+
+            LOOP_UNROLLING(int, i, 0, 1, N0,
+            {
+                b[i].v = {{ZERO_VALUE}};
+            })
+
+            // Load tile from the src tensor
+            T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, g_ind_2, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
+
+            // Load tile from the weights tensor
+            // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration
+            T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
+
+            // Compute the matrix multiplication between two tiles
+            T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}});
+
+            ++ck;
+        }
+    )_";
+}
+
+code += R"_(
+#undef _I_WEI_WIDTH
+#undef _I_WEI_HEIGHT
+#undef _ISRC_WIDTH
+#undef _ISRC_HEIGHT
+#undef _ISRC_CHANNELS
+#undef _IDST_WIDTH
+#undef _IDST_HEIGHT
+#undef _IDST_CHANNELS
+#undef _IY_MULTIPLIER
+
+    }
+)_";
+
+    if(_bias && _bias->has_valid_id())
+    {
+        code += R"_(
+        TILE({{BIA_DATA_TYPE}}, 1, N0, bias0);
+
+        T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 1, 0, bias0);
+
+        // c = c + bias[broadcasted]
+        T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
+    )_";
+}
+
+code += R"_(
+}
+//------------------ END KERNEL {{meta_kernel_id}} ---------------------
+)_";
+    return code;
+}
+
+void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+    vtable.declare_variable(
+        _src,
+        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+        comp_group.is_intermediate_tensor(_src),
+        "src");
+
+    const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image() ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+    vtable.declare_variable(
+        _weight,
+        GpuKernelArgumentInfo(weight_type),
+        comp_group.is_intermediate_tensor(_weight),
+        "weight");
+
+    if(_bias && _bias->has_valid_id()) // optional bias
+    {
+        vtable.declare_variable(
+            _bias,
+            GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector),
+            comp_group.is_intermediate_tensor(_bias),
+            "bias");
+    }
+    vtable.declare_variable(
+        _dst,
+        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+        comp_group.is_intermediate_tensor(_dst),
+        "dst");
+}
+
+TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+    TagLUT lut{};
+    // Arguments and global shared variables
+    lut["src"]    = vtable.get_variable(_src);
+    lut["weight"] = vtable.get_variable(_weight);
+
+    if(_bias && _bias->has_valid_id()) // optional bias
+    {
+        lut["bias"]          = vtable.get_variable(_bias);
+        lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
+    }
+    lut["dst"] = vtable.get_variable(_dst);
+
+    const auto dst_argument = vtable.get_variable(comp_group.get_dst_tensors()[0]);
+    lut["arg_dst"]          = dst_argument.uniq_name;
+
+    // Local build options
+    lut["meta_kernel_id"] = id();
+    lut["ACC_DATA_TYPE"]  = _src->data_type();
+    lut["SRC_DATA_TYPE"]  = _src->data_type();
+    lut["WEI_DATA_TYPE"]  = _weight->data_type();
+
+    lut["SRC_TENSOR_TYPE"] = "BUFFER";
+    switch(vtable.get_variable(_weight).kernel_argument_info.type)
+    {
+        case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
+        case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
+        case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
+    {
+        lut["WEI_TENSOR_TYPE"] = "IMAGE";
+        break;
+    }
+        default:
+    {
+        lut["WEI_TENSOR_TYPE"] = "BUFFER";
+        break;
+    }
+    }
+    const auto width_idx  = 1;
+    const auto height_idx = 2;
+    lut["WEI_WIDTH"]      = _weight->dimension(width_idx);
+    lut["WEI_HEIGHT"]     = _weight->dimension(height_idx);
+
+    lut["STRIDE_X"] = _attributes.stride().x();
+    lut["STRIDE_Y"] = _attributes.stride().y();
+
+    lut["PAD_LEFT"] = _attributes.pad().left;
+    lut["PAD_TOP"]  = _attributes.pad().top;
+
+    lut["ZERO_VALUE"] = 0;
+
+    return lut;
+}
+
+CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &comp_group) const
+{
+    const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
+    const DataType     data_type   = _src->data_type();
+
+    /// NOTE: For now tile sizes (n0, m0, n0) are set by the execution window. This may change in the future
+    const auto         root_window      = comp_group.get_root_component()->template_writer()->get_window();
+    const unsigned int n0               = root_window.x().step();
+    const unsigned int m0               = root_window.y().step();
+    const unsigned int k0               = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, _src->dimension(channel_idx));
+    const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
+
+    CLBuildOptions build_opts{};
+    if(_settings.fast_relaxed_math())
+    {
+        build_opts.add_option("-cl-fast-relaxed-math");
+    }
+    else
+    {
+        // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations
+        // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations
+        build_opts.add_option("-cl-unsafe-math-optimizations");
+    }
+    build_opts.add_option("-DIS_TILED");
+    build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
+    build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
+    build_opts.add_option("-DK0=" + support::cpp11::to_string(k0));
+    build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
+
+    return build_opts;
+}
+
+std::string ClTemplateDirectConv2d::get_config_id() const
+{
+    const DataType   data_type   = _src->data_type();
+    const DataLayout data_layout = _src->data_layout();
+
+    const unsigned int width_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+    const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+    const unsigned int kernel_size = _weight->dimension(width_idx);
+
+    std::string config_id{};
+    config_id += lower_string(string_from_data_type(data_type));
+    config_id += "_";
+    config_id += support::cpp11::to_string(kernel_size);
+    config_id += "_";
+    config_id += support::cpp11::to_string(_attributes.stride().x());
+    config_id += "_";
+    config_id += support::cpp11::to_string(_attributes.stride().y());
+    config_id += "_";
+    config_id += support::cpp11::to_string(_dst->dimension(width_idx));
+    config_id += "_";
+    config_id += support::cpp11::to_string(_dst->dimension(height_idx));
+    config_id += "_";
+    config_id += lower_string(string_from_data_layout(data_layout));
+    return config_id;
+}
+
+std::set<std::string> ClTemplateDirectConv2d::get_headers_list() const
+{
+    return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+}
+
+Window ClTemplateDirectConv2d::get_window() const
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+
+    const auto output_shape = _dst->tensor_shape();
+
+    const unsigned int vec_size = std::min(static_cast<unsigned int>(output_shape[0]), 4u);
+    const unsigned int num_rows = (_dst->tensor_shape()[0] > 16) ? ((_src->data_type() == DataType::F32) ? 2U : 4U) : 1U;
+
+    // Create and configure kernel window
+    Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
+
+    const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], num_rows);
+    win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, num_rows));
+    win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1));
+
+    return win;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
new file mode 100644
index 0000000000..48027a9b8d
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D
+
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class ClTemplateDirectConv2d final : public IGpuTemplateComponentWriter
+{
+public:
+    using Attributes = ClComponentDirectConv2d::Attributes;
+    using Settings   = ClComponentDirectConv2d::Settings;
+    /** Constructor
+     *
+     * Similar to @ref ClComponentDirectConv2d::validate()
+     *
+     * @param[in] id         Component id
+     * @param[in] tensors    Tensor arguments to the components
+     * @param[in] attributes Component attributes
+     * @param[in] settings   Component settings
+     */
+    ClTemplateDirectConv2d(ComponentId                      id,
+                           const ArgumentPack<ITensorInfo> &tensors,
+                           const Attributes                &attributes,
+                           const Settings                  &settings);
+    /** Prevent instances of this class from being copy constructed */
+    ClTemplateDirectConv2d(const ClTemplateDirectConv2d &direct_conv2d) = delete;
+    /** Prevent instances of this class from being copied */
+    ClTemplateDirectConv2d &operator=(const ClTemplateDirectConv2d &direct_conv2d) = delete;
+    /** Allow instances of this class to be move constructed */
+    ClTemplateDirectConv2d(ClTemplateDirectConv2d &&direct_conv2d) = default;
+    /** Allow instances of this class to be moved */
+    ClTemplateDirectConv2d &operator=(ClTemplateDirectConv2d &&direct_conv2d) = default;
+    /** Generate kernel component name */
+    std::string get_name() const override;
+    /** Generate kernel component code template
+     *
+     * @param[in] comp_group Component group of which the component is a part of
+     *
+     * @return std::string Component code
+     */
+    std::string get_component_code(const ComponentGroup &comp_group) const override;
+    /** Declare all variables used by the component in the @p vtable
+     *
+     * @param[out] vtable     Variable table
+     * @param[in]  comp_group Component group of which the component is a part of
+     */
+    void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+    /** Generate the tag look-up table used to instantiate the component code.
+     *
+     * @param[in] vtable     Variable table
+     * @param[in] comp_group Component group of which the component is a part of
+     *
+     * @return TagLUT  Tag lookup table
+     */
+    TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+    /** Generate the build options used in the component
+     *
+     * @param[in] comp_group Component group of which the component is a part of
+     *
+     * @return CLBuildOptions Build options
+     */
+    CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
+    /** Generate the component config id string used for tuning */
+    std::string get_config_id() const override;
+    /** Generate the header list used in the component */
+    std::set<std::string> get_headers_list() const override;
+    /** Generate the execution window for the component */
+    Window get_window() const override;
+
+private:
+    const ITensorInfo *_src;
+    const ITensorInfo *_weight;
+    const ITensorInfo *_bias;
+    const ITensorInfo *_dst;
+    Attributes         _attributes;
+    Settings           _settings;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
new file mode 100644
index 0000000000..6c4b8f52f2
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateStore.h"
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+ClTemplateStore::ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
+    : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}
+{
+    _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+    _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+}
+
+std::string ClTemplateStore::get_name() const
+{
+    return "store";
+}
+
+std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group) const
+{
+    ARM_COMPUTE_UNUSED(comp_group);
+    return R"_(
+//------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
+{
+// This also follows NHWC layout
+// g_ind_0 maps to global_id(0) maps to Channel
+// g_ind_1 maps to global_id(1) maps to Height and Weight (Collapsed Window)
+// g_ind_2 maps to global_id(2) maps to N / Batch
+#define _IDST_WIDTH {{dst}}_w
+#define _IDST_HEIGHT {{dst}}_h
+    TILE(uint, M0, 1, dst_indirect_y);
+
+    // Calculate the destination indirect Y
+    LOOP_UNROLLING(int, i, 0, 1, M0,
+    {
+        dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)(_IDST_WIDTH * _IDST_HEIGHT) - 1);
+        dst_indirect_y[i].v += g_ind_2 * (int)(_IDST_WIDTH * _IDST_HEIGHT);
+    })
+
+    bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
+
+    T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, dst_indirect_y);
+
+#undef _IDST_WIDTH
+#undef _IDST_HEIGHT
+    //------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
+}
+
+)_";
+}
+
+void ClTemplateStore::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+    // ARM_COMPUTE_UNUSED(comp_group)
+    vtable.declare_variable(
+        _src,
+        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+        comp_group.is_intermediate_tensor(_src),
+        "src");
+    vtable.declare_variable(
+        _dst,
+        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+        comp_group.is_intermediate_tensor(_dst),
+        "dst");
+}
+
+TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+    TagLUT lut{};
+
+    // Arguments and global shared variables
+    lut["src"] = vtable.get_variable(_src);
+    lut["dst"] = vtable.get_variable(_dst);
+
+    // Local build options
+    lut["meta_kernel_id"]  = id();
+    lut["DST_TENSOR_TYPE"] = "BUFFER";
+    const auto dst_info    = comp_group.get_dst_tensors()[0];
+    lut["DST_DATA_TYPE"]   = dst_info->data_type();
+
+    return lut;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
new file mode 100644
index 0000000000..3f97a82204
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
+
+#include "arm_compute/core/experimental/Types.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class ClTemplateStore final : public IGpuTemplateComponentWriter
+{
+public:
+    /** Constructor
+     *
+     * @param[in] id      Component id
+     * @param[in] tensors Tensor arguments to the components
+     */
+    ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors);
+    /** Prevent instances of this class from being copy constructed */
+    ClTemplateStore(const ClTemplateStore &store) = delete;
+    /** Prevent instances of this class from being copied */
+    ClTemplateStore &operator=(const ClTemplateStore &store) = delete;
+    /** Allow instances of this class to be move constructed */
+    ClTemplateStore(ClTemplateStore &&store) = default;
+    /** Allow instances of this class to be moved */
+    ClTemplateStore &operator=(ClTemplateStore &&store) = default;
+    /** Generate kernel component name */
+    std::string get_name() const override;
+    /** Generate kernel component code template
+     *
+     * @param[in] comp_group Component group of which the component is a part of
+     *
+     * @return std::string Component code
+     */
+    std::string get_component_code(const ComponentGroup &comp_group) const override;
+    /** Declare all variables used by the component in the @p vtable
+     *
+     * @param[out] vtable     Variable table
+     * @param[in]  comp_group Component group of which the component is a part of
+     */
+    void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+    /** Generate the tag look-up table used to instantiate the component code.
+     *
+     * @param[in] vtable     Variable table
+     * @param[in] comp_group Component group of which the component is a part of
+     *
+     * @return TagLUT  Tag lookup table
+     */
+    TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+
+private:
+    const ITensorInfo *_src;
+    const ITensorInfo *_dst;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
new file mode 100644
index 0000000000..cb643a741d
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateWriter.h"
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/// @note: some tags can be unused since they could be used only for the macros, or only for the component code
+std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags)
+{
+    std::string replaced_code    = "";
+    bool        scanning_pattern = false;
+    std::string pattern_found    = "";
+    for(size_t i = 0; i < code_template.size() - 1; ++i)
+    {
+        if(!scanning_pattern)
+        {
+            if(code_template[i] == '{' && code_template[i + 1] == '{')
+            {
+                i += 1;
+                scanning_pattern = true;
+                pattern_found    = "";
+            }
+            else
+            {
+                replaced_code += code_template[i];
+            }
+        }
+        else
+        {
+            if(code_template[i] == '}' && code_template[i + 1] == '}')
+            {
+                i += 1;
+                scanning_pattern = false;
+                std::string err  = "Pattern " + pattern_found + " not found in tags";
+                ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
+                replaced_code += tags.find(pattern_found)->second.value;
+            }
+            else
+            {
+                pattern_found += code_template[i];
+            }
+        }
+    }
+
+    return replaced_code;
+}
+ClTemplateWriter::~ClTemplateWriter()
+{
+}
+ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components)
+    : _components{ components }
+{
+}
+std::string ClTemplateWriter::get_name()
+{
+    return write_kernel_name();
+}
+std::string ClTemplateWriter::get_code()
+{
+    return write_code();
+}
+std::string ClTemplateWriter::get_config_id()
+{
+    std::string config_id = get_name();
+    for(const auto &comp : _components)
+    {
+        config_id += "--" + comp->template_writer()->get_config_id() + "--";
+    }
+
+    return config_id;
+}
+
+CLBuildOptions ClTemplateWriter::get_build_options()
+{
+    CLBuildOptions build_opts{};
+
+    for(const auto &comp : _components)
+    {
+        build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
+    }
+
+    return build_opts;
+}
+
+Window ClTemplateWriter::get_window() const
+{
+    const auto root_comp = _components.get_root_component();
+    ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found");
+    return root_comp->template_writer()->get_window();
+}
+
+std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
+{
+    // Assemble GpuKernelArguments
+    std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
+    for(const auto t : _components.get_argument_tensors())
+    {
+        tensors.emplace(
+            t->id(),
+            GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info });
+    }
+    return tensors;
+}
+
+std::string ClTemplateWriter::write_code()
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found");
+
+    // These data structures will hold the data from all the components in the blueprint
+    std::set<std::string>    headers_list{};
+    std::set<std::string>    additional_macros{};
+    std::vector<std::string> component_codes{}; // vector because order matters
+
+    // Pass 1: Declare all kernel variables
+    for(auto &component : _components)
+    {
+        component->template_writer()->declare_variables(_vtable, _components);
+    }
+    // Pass 2: Generate component codes
+    for(auto &component : _components)
+    {
+        const auto component_writer       = component->template_writer();
+        auto       curr_headers_list      = component_writer->get_headers_list();
+        auto       curr_additional_macros = component_writer->get_additional_macros();
+        auto       curr_component_code    = component_writer->get_component_code(_components);
+        const auto var_lut                = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
+        component_codes.push_back(replace_tags(curr_component_code, var_lut));
+
+        headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
+        if(!additional_macros.empty()) // Some components might not have any
+        {
+            additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
+        }
+    }
+
+    // Step 3: Assemble the data gathered by traversing the graph into the string "code"
+    std::string code = "";
+
+    for(auto &header : headers_list)
+    {
+#if defined(EMBEDDED_KERNELS)
+        code += CLKernelLibrary::get().get_program(header).first;
+#else  // defined(EMBEDDED_KERNELS)
+        code += "#include \"" + header + "\"\n";
+#endif // defined(EMBEDDED_KERNELS)
+    }
+
+    for(auto &macros : additional_macros)
+    {
+        code += macros;
+    }
+
+    code += write_kernel_signature(_vtable.get_variable_list(_components.get_argument_tensors()));
+
+    code += "\n{\n\n";
+
+    code += "    //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
+    code += write_global_section();
+    code += "    //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
+
+    for(const auto &component_code : component_codes)
+    {
+        code += component_code;
+    }
+
+    code += "}\n";
+
+    return code;
+}
+std::string ClTemplateWriter::write_global_section() const
+{
+    const auto dst_tensors = _components.get_dst_tensors();
+    ARM_COMPUTE_ERROR_ON_MSG(dst_tensors.size() != 1, "Only one destination tensor per kernel is allowed");
+    const auto dst_info   = dst_tensors[0];
+    const auto dst_w      = dst_info->dimension(0);
+    const auto tile_w     = std::max(1, get_window().x().step());
+    const auto tile_h     = std::max(1, get_window().y().step());
+    auto       leftover_w = dst_w % tile_w;
+
+    std::string code = "";
+    code += std::string("    int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
+    code += std::string("    int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
+    code += std::string("    int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
+
+    code += "    const bool g_cond_x = (g_ind_0 == 0);\n";
+    code += "    const bool g_cond_y = (g_ind_1 == 0);\n";
+
+    return code;
+}
+std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
+{
+    std::string code;
+    switch(var.kernel_argument_info.type)
+    {
+        case GpuKernelArgumentInfo::Type::Vector:
+        {
+            code += "\n    VECTOR_DECLARATION(" + var.uniq_name + ")";
+            break;
+        }
+        case GpuKernelArgumentInfo::Type::Image:
+        {
+            code += "\n    IMAGE_DECLARATION(" + var.uniq_name + ")";
+            break;
+        }
+        case GpuKernelArgumentInfo::Type::Image_3D:
+        {
+            code += "\n    IMAGE_DECLARATION(" + var.uniq_name + "),";
+            code += "\n    unsigned int " + var.uniq_name + "_stride_z";
+            break;
+        }
+        case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
+        {
+            code += "\n    __read_only image2d_t " + var.uniq_name + "_img,";
+            code += "\n    unsigned int " + var.uniq_name + "_stride_z";
+            break;
+        }
+        case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
+        {
+            code += "\n    TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
+            break;
+        }
+        case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
+        {
+            code += "\n    TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
+            break;
+        }
+        default:
+        {
+            ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type");
+        }
+    }
+    return code;
+}
+std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const
+{
+    std::string code = "\n__kernel void " + write_kernel_name() + "(";
+
+    for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
+    {
+        code += write_argument_declaration(argument_list[i]) + ",";
+    }
+    if(static_cast<int>(argument_list.size()) - 1 >= 0)
+    {
+        code += write_argument_declaration(argument_list[argument_list.size() - 1]);
+    }
+
+    code += ')';
+
+    return code;
+}
+std::string ClTemplateWriter::write_kernel_name() const
+{
+    if(_components.empty())
+    {
+        return "empty_kernel";
+    }
+    std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
+    for(size_t i = 1; i < _components.size(); ++i)
+    {
+        name += "___";
+        name += _components[i]->template_writer()->get_name();
+    }
+
+    return name;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h
new file mode 100644
index 0000000000..83f617b6c6
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+
+#include <map>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Use a templated-string-based method to write kernel code
+ *  It stitches the component code templates together based on the valid fusion configuration.
+ *  It then instantiates the actual kernel code from the template and the generated tag lookup table.
+ */
+class ClTemplateWriter : public IGpuKernelWriter
+{
+public:
+    /** Instantiates a kernel code string from the kernel code template
+     * @note: some tags can be unused since they could be used only for the macros, or only for the component code
+     *
+     * @param[in] code_template Kernel code template
+     * @param[in] tags          Tag lookup table
+     *
+     * @return std::string  Instantiated kernel string
+     */
+    static std::string replace_tags(const std::string &code_template, const TagLUT &tags);
+    /** Default constructor */
+    ClTemplateWriter() = default;
+    /** Constructor
+     *
+     * @param[in] components Kernel component group from which the kernel will be generated
+     */
+    ClTemplateWriter(const GpuKernelComponentGroup &components);
+    /** Destructor */
+    ~ClTemplateWriter() override;
+    /** Generate kernel name */
+    std::string get_name() override;
+    /** Generate kernel code */
+    std::string get_code() override;
+    /** Generate build options */
+    CLBuildOptions get_build_options() override;
+    /** Generate config id string of the entire kernel. This is used for tuning */
+    std::string get_config_id() override;
+    /** Generate execution window */
+    Window get_window() const override;
+    /** Get the kernel argument lists of the kernel*/
+    std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() override;
+
+private:
+    std::string write_kernel_name() const;
+    std::string write_code();
+    std::string write_global_section() const;
+    std::string write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const;
+    std::string write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const;
+
+private:
+    GpuKernelComponentGroup _components{};
+    GpuKernelVariableTable  _vtable{};
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER */
diff --git a/src/dynamic_fusion/sketch/utils/DependencyGraph.h b/src/dynamic_fusion/sketch/utils/DependencyGraph.h
new file mode 100644
index 0000000000..55eb4c5c77
--- /dev/null
+++ b/src/dynamic_fusion/sketch/utils/DependencyGraph.h
@@ -0,0 +1,658 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH
+#define SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH
+
+#include "arm_compute/core/Error.h"
+#include <algorithm>
+#include <cstdint>
+#include <deque>
+#include <map>
+#include <set>
+#include <tuple>
+#include <vector>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+template <typename T>
+bool is_in(const T &v, const std::vector<T> &vec)
+{
+    return std::find(std::begin(vec), std::end(vec), v) != std::end(vec);
+}
+} // namespace
+
+/** A multi-input (tensors), multi-output (tensors) acyclic directed graph
+ *  Represented as a doubly-linked adjacency list with the differentiation between source and destination
+ */
+class DependencyGraph
+{
+public:
+    using Id         = int32_t;
+    using TensorId   = Id;
+    using OperatorId = Id;
+    /** Adjacency list
+     *
+     */
+    using AdjList = std::map<Id, std::vector<Id>>;
+
+    /** A pack of operator including its input and output tensors, used by traversing through the graph in topological order
+     *
+     */
+    struct OpPack
+    {
+        OperatorId            op{};
+        std::vector<TensorId> inputs{};
+        std::vector<TensorId> outputs{};
+        friend bool operator==(const OpPack &opp0, const OpPack &opp1)
+        {
+            return std::make_tuple(
+                       opp0.op, opp0.inputs, opp0.outputs)
+                   == std::make_tuple(
+                       opp1.op, opp1.inputs, opp1.outputs);
+        }
+    };
+
+public:
+    DependencyGraph() = default;
+    friend std::ostream &operator<<(std::ostream &os, const DependencyGraph &);
+
+    /** Try adding an operator (without actually adding it), while keeping the graph as a "linear sequence" / list
+     * @note The list is expected to only grow from head to tail
+     *
+     * PRECONDITION: The current graph is already linear
+     *
+     * @return true  If the operator can be added while keeping the graph as a linear sequence
+     * @return false  Otherwise
+     */
+    bool try_add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs) const
+    {
+        ARM_COMPUTE_UNUSED(op, outputs);
+        if(all_ops().empty())
+        {
+            return true;
+        }
+        std::vector<TensorId> common_tensors{};
+        auto                  existing_tensors = all_tensors();
+        std::sort(existing_tensors.begin(), existing_tensors.end()); // To use std::set_intersection, both input sets must be sorted
+        std::vector<TensorId> sorted_inputs{ inputs };
+        std::sort(sorted_inputs.begin(), sorted_inputs.end());
+        std::set_intersection(existing_tensors.begin(), existing_tensors.end(),
+                              sorted_inputs.begin(), sorted_inputs.end(), std::back_inserter(common_tensors));
+        if(common_tensors.size() != 1U)
+        {
+            return false;
+        }
+        const auto linked_tensor = common_tensors[0];
+        const auto tail_ops      = get_dst_ops();
+        ARM_COMPUTE_ERROR_ON(tail_ops.size() != 1U); // PRECONDITION
+        const auto tail = tail_ops[0];
+
+        if(!is_in(linked_tensor, dst_tensors(tail)))
+        {
+            return false;
+        }
+        return true;
+    }
+    /** Add an operator, while keeping the graph as a "linear sequence"
+     *
+     * PRECONDITION: The current graph is already linear
+     * INVARIANT: The list can only grow from head to tail
+     * INVARIANT: POSTCONDITION: The graph is linear
+     */
+    void add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs)
+    {
+        ARM_COMPUTE_ERROR_ON(!try_add_operator_as_linear(op, inputs, outputs));
+        auto success = add_operator(op, inputs, outputs);
+        ARM_COMPUTE_ERROR_ON(!success);
+    }
+    /** Add a new operator
+     *  Return invalid if it violates the DAG invariant
+     *  Invalid operation will not change the graph
+     *
+     * @param[in] op      Operator to add
+     * @param[in] inputs  Input tensors to the operator
+     * @param[in] outputs Output tensors to the operator
+     */
+    bool add_operator(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs)
+    {
+        if(operator_exists(op))
+        {
+            return false;
+        }
+        _adj_src_tensors[op] = {};
+        _adj_dst_tensors[op] = {};
+        for(auto in_tensor : inputs)
+        {
+            // Linking input tensor to operator node will never create a cycle / loop because we guarantee
+            // each op is newly created, so every <input, op> pair / edge is new
+            link_input(op, in_tensor);
+        }
+        for(auto out_tensor : outputs)
+        {
+            // If there exists a back path from op's output tensor to op already, then linking the two will create a loop / cycle
+            if(path_exists_from_tensor_to_op(out_tensor, op))
+            {
+                remove_operator(op);
+                return false;
+            }
+            else
+            {
+                link_output(op, out_tensor);
+            }
+        }
+
+        return true;
+    }
+
+    /** Sort the graph in a topological order
+     *
+     * @return std::vector<OpPack>
+     */
+    std::vector<OpPack> topological_sort() const
+    {
+        // Incident degree (number of source operators to an op)
+        std::map<OperatorId, unsigned int> in_degree{};
+        std::set<OperatorId>   visited_ops{};
+        std::deque<OperatorId> zero_in_degree_ops{};
+        std::vector<OpPack>    sorted_op_packs{};
+        for(auto op : all_ops())
+        {
+            const auto degree = src_ops(op).size();
+            in_degree[op]     = degree;
+            if(degree == 0)
+            {
+                zero_in_degree_ops.push_back(op);
+                visited_ops.insert(op);
+            }
+        }
+
+        while(!zero_in_degree_ops.empty())
+        {
+            const OperatorId op = zero_in_degree_ops.front();
+            zero_in_degree_ops.pop_front();
+            sorted_op_packs.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });
+
+            for(const auto next_op : dst_ops(op))
+            {
+                if(in_degree[next_op] > 0)
+                {
+                    in_degree[next_op]--;
+                }
+                if(in_degree[next_op] == 0 && visited_ops.find(next_op) == visited_ops.end())
+                {
+                    zero_in_degree_ops.push_back(next_op);
+                    visited_ops.insert(op);
+                }
+            }
+        }
+
+        return sorted_op_packs;
+    }
+
+    void find_independent_paths_util(OperatorId op, std::vector<std::vector<OperatorId>> &paths, std::vector<OperatorId> cur_path,
+                                     const std::map<OperatorId, unsigned int> &in_degree) const
+    {
+        // We have found an unresolved dependency
+        if(in_degree.at(op) > 1)
+        {
+            paths.push_back(cur_path);
+            return;
+        }
+        const auto child_ops = dst_ops(op);
+
+        cur_path.push_back(op);
+        // Hit the leaf op
+        if(child_ops.empty())
+        {
+            paths.push_back(cur_path);
+            return;
+        }
+        for(const auto child_op : child_ops)
+        {
+            find_independent_paths_util(child_op, paths, cur_path, in_degree);
+        }
+    }
+    /** Find all independent linear paths from op, which doesn't depend on any other op
+     *
+     * @return std::vector<OpPack>
+     */
+    std::vector<std::vector<OperatorId>> find_independent_paths(OperatorId op,
+                                                                const std::map<OperatorId, unsigned int> &in_degree) const
+    {
+        std::vector<std::vector<OperatorId>> paths;
+        std::vector<OperatorId>              cur_path;
+        find_independent_paths_util(op, paths, cur_path, in_degree);
+        return paths;
+    }
+    /** Find a longest linear path from op, which doesn't depend on any other op
+     *
+     * @return std::vector<OpPack>
+     */
+    std::vector<OperatorId> find_longest_independent_path(OperatorId op,
+                                                          const std::map<OperatorId, unsigned int> &in_degree) const
+    {
+        const auto &paths = find_independent_paths(op, in_degree);
+        ARM_COMPUTE_ERROR_ON(paths.empty());
+        size_t                         max_len  = 0;
+        const std::vector<OperatorId> *max_path = nullptr;
+
+        for(const auto &path : paths)
+        {
+            if(path.size() >= max_len)
+            {
+                max_path = &path;
+                max_len  = path.size();
+            }
+        }
+        return *max_path;
+    }
+    std::vector<OperatorId> propose_next_path(std::set<OperatorId> &candidate_ops,
+                                              const std::map<OperatorId, unsigned int> &in_degree) const
+    {
+        if(candidate_ops.empty())
+        {
+            return {};
+        }
+        size_t                  max_len = 0;
+        std::vector<OperatorId> max_path;
+        OperatorId              chosen_op{};
+        for(auto op : candidate_ops)
+        {
+            const auto path = find_longest_independent_path(op, in_degree);
+            if(path.size() >= max_len)
+            {
+                chosen_op = op;
+                max_path  = path;
+                max_len   = path.size();
+            }
+        }
+        candidate_ops.erase(chosen_op);
+        return max_path;
+    }
+    /** Partition the graph into a list of linear sub-"graphs", while preserving the topological order, and trying to minimize
+     *  the number of partitions
+     */
+    std::vector<std::vector<OpPack>> topological_partition() const
+    {
+        // Initialize zero incident degree and zero in degree ops
+        std::map<OperatorId, unsigned int> in_degree{};
+        std::set<OperatorId> candidate_ops{};
+        for(auto op : all_ops())
+        {
+            const auto degree = src_ops(op).size();
+            in_degree[op]     = degree;
+            if(degree == 0)
+            {
+                candidate_ops.insert(op);
+            }
+        }
+
+        std::vector<std::vector<OpPack>> sorted_partitions{};
+        while(!candidate_ops.empty())
+        {
+            // generate_longest_path_from_zero_indegree_ops(in_degree, visited_ops, candidate_ops)
+            const auto path = propose_next_path(candidate_ops, in_degree);
+
+            // Append to sorted_partitions
+            std::vector<OpPack> path_op_pack{};
+            for(auto op : path)
+            {
+                path_op_pack.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });
+            }
+            sorted_partitions.push_back(path_op_pack);
+            // Remove whole path (Update in_degree, visited_ops, candidate_ops)
+            for(auto op : path)
+            {
+                for(const auto next_op_child : dst_ops(op))
+                {
+                    if(in_degree[next_op_child] > 0)
+                    {
+                        in_degree[next_op_child]--;
+                    }
+                    if(in_degree[next_op_child] == 0 && !is_in(next_op_child, path)) // We do not want to put the proposed path back into candidates
+                    {
+                        candidate_ops.insert(next_op_child);
+                    }
+                }
+            }
+        }
+        return sorted_partitions;
+    }
+
+    /** Strict equality comparison (all internal ids and order of insertion matter).
+     *        In the future this may be replaced with a topological comparison, allowing equivalent graphs with different internal ids to be equal
+     *
+     *
+     * @param[in] g0
+     * @param[in] g1
+     * @return true  If the same
+     * @return false Otherwise
+     */
+    friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1)
+    {
+        // Do not compare id allocators
+        return std::make_tuple(
+                   g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops)
+               == std::make_tuple(
+                   g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops);
+    }
+    std::vector<OperatorId> src_ops_from_tensor(TensorId tensor) const
+    {
+        return _adj_src_ops.at(tensor);
+    }
+    std::vector<OperatorId> dst_ops_from_tensor(TensorId tensor) const
+    {
+        return _adj_dst_ops.at(tensor);
+    }
+    /** Get all tensors
+     *
+     * @return std::vector<TensorId>
+     */
+    std::vector<TensorId> all_tensors() const
+    {
+        std::vector<TensorId> tensors{};
+        std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors), [](const auto & it)
+        {
+            return it.first;
+        });
+        return tensors;
+    }
+    /** Get source tensors of the whole graph
+     *
+     * @return std::vector<TensorId>
+     */
+    std::vector<TensorId> global_src_tensors() const
+    {
+        std::vector<TensorId> tensors;
+        for(auto tensor_src_ops : _adj_src_ops)
+        {
+            if(tensor_src_ops.second.empty())
+            {
+                tensors.push_back(tensor_src_ops.first);
+            }
+        }
+        return tensors;
+    }
+    /** Get destination tensors of the whole graph
+     *
+     * @return std::vector<TensorId>
+     */
+    std::vector<TensorId> global_dst_tensors() const
+    {
+        std::vector<TensorId> tensors;
+        for(auto tensor_dst_ops : _adj_dst_ops)
+        {
+            if(tensor_dst_ops.second.empty())
+            {
+                tensors.push_back(tensor_dst_ops.first);
+            }
+        }
+        return tensors;
+    }
+    /** Get all root ops. Root ops can also be referred to as "src ops" of the whole graph
+     *
+     * @return std::vector<OperatorId>
+     */
+    std::vector<OperatorId> get_root_ops() const
+    {
+        std::vector<OperatorId> ops{};
+        const auto              op_list = all_ops();
+
+        for(auto op : op_list)
+        {
+            if(src_ops(op).empty())
+            {
+                ops.emplace_back(op);
+            }
+        }
+        return ops;
+    }
+
+private:
+    void link_input(OperatorId op, TensorId in_tensor)
+    {
+        ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+        if(!tensor_exists(in_tensor))
+        {
+            insert_new_tensor(in_tensor);
+        }
+        ARM_COMPUTE_ERROR_ON(are_connected(op, in_tensor)); // Prevent repetitive linking
+        _adj_src_tensors[op].push_back(in_tensor);
+        _adj_dst_ops[in_tensor].push_back(op);
+    }
+    void link_output(OperatorId op, TensorId out_tensor)
+    {
+        ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+        if(!tensor_exists(out_tensor))
+        {
+            insert_new_tensor(out_tensor);
+        }
+        ARM_COMPUTE_ERROR_ON(are_connected(op, out_tensor)); // Prevent repetitive linking
+        _adj_dst_tensors[op].push_back(out_tensor);
+        _adj_src_ops[out_tensor].push_back(op);
+    }
+
+    std::vector<OperatorId> src_ops(OperatorId op) const
+    {
+        ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+        std::vector<OperatorId> ops{};
+        for(TensorId src_tensor : src_tensors(op))
+        {
+            ops.insert(ops.end(), std::begin(_adj_src_ops.at(src_tensor)), std::end(_adj_src_ops.at(src_tensor)));
+        }
+        return ops;
+    }
+    std::vector<OperatorId> dst_ops(OperatorId op) const
+    {
+        ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+        std::vector<OperatorId> ops{};
+        for(TensorId dst_tensor : _adj_dst_tensors.at(op))
+        {
+            ops.insert(ops.end(), std::begin(_adj_dst_ops.at(dst_tensor)), std::end(_adj_dst_ops.at(dst_tensor)));
+        }
+        return ops;
+    }
+
+    /** Get source tensors to an operator
+     *
+     * @param[in] op
+     * @return std::vector<TensorId>
+     */
+    std::vector<TensorId> src_tensors(OperatorId op) const
+    {
+        ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+        return _adj_src_tensors.at(op);
+    }
+    /** Get destination tensors to an operator
+     *
+     * @param[in] op
+     * @return std::vector<TensorId>
+     */
+    std::vector<TensorId> dst_tensors(OperatorId op) const
+    {
+        ARM_COMPUTE_ERROR_ON(!operator_exists(op));
+        return _adj_dst_tensors.at(op);
+    }
+    /** Get all operators
+     *
+     * @return std::vector<OperatorId>
+     */
+    std::vector<OperatorId> all_ops() const
+    {
+        std::vector<OperatorId> ops{};
+        std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops), [](const auto & it)
+        {
+            return it.first;
+        });
+        return ops;
+    }
+    /** Remove an operator from graph.
+     *
+     * @param[in] op
+     */
+    void remove_operator(OperatorId op)
+    {
+        for(auto src_tensor : _adj_src_tensors.at(op))
+        {
+            auto &dst_ops = _adj_dst_ops.at(src_tensor);
+            dst_ops.erase(
+                std::remove(std::begin(dst_ops), std::end(dst_ops), op),
+                std::end(dst_ops));
+        }
+        for(auto dst_tensor : _adj_dst_tensors.at(op))
+        {
+            auto &src_ops = _adj_src_ops.at(dst_tensor);
+            src_ops.erase(
+                std::remove(std::begin(src_ops), std::end(src_ops), op),
+                std::end(src_ops));
+        }
+        // Remove any isolated tensors
+        // An isolated tensor is one where both its _adj_src_ops and _adj_dst_ops are empty
+        for(auto t : all_tensors())
+        {
+            if(_adj_src_ops.at(t).empty() && _adj_dst_ops.at(t).empty())
+            {
+                _adj_src_ops.erase(t);
+                _adj_dst_ops.erase(t);
+            }
+        }
+        _adj_src_tensors.erase(op);
+        _adj_dst_tensors.erase(op);
+    }
+    void insert_new_tensor(TensorId tensor)
+    {
+        _adj_src_ops[tensor] = {};
+        _adj_dst_ops[tensor] = {};
+    }
+    bool tensor_exists(TensorId tensor) const
+    {
+        return _adj_src_ops.find(tensor) != _adj_src_ops.end() && _adj_dst_ops.find(tensor) != _adj_dst_ops.end();
+    }
+    bool operator_exists(OperatorId op) const
+    {
+        return _adj_src_tensors.find(op) != _adj_src_tensors.end() && _adj_dst_tensors.find(op) != _adj_dst_tensors.end();
+    }
+    bool is_src_tensor_of(OperatorId op, TensorId tensor) const
+    {
+        if(!operator_exists(op) || !tensor_exists(tensor))
+        {
+            return false;
+        }
+        const auto op_inputs = src_tensors(op);
+        return std::find(op_inputs.begin(), op_inputs.end(), tensor) != op_inputs.end();
+    }
+    bool is_dst_tensor_of(OperatorId op, TensorId tensor) const
+    {
+        if(!operator_exists(op) || !tensor_exists(tensor))
+        {
+            return false;
+        }
+        const auto op_outputs = dst_tensors(op);
+        return std::find(op_outputs.begin(), op_outputs.end(), tensor) != op_outputs.end();
+    }
+    bool are_connected(OperatorId op, TensorId tensor) const
+    {
+        return is_src_tensor_of(op, tensor) || is_dst_tensor_of(op, tensor);
+    }
+    /** If op is the destination / leaf operator of the whole graph
+     *
+     * @param[in] op
+     * @return true
+     * @return false
+     */
+    bool is_dst_op(OperatorId op) const
+    {
+        return dst_ops(op).empty();
+    }
+    std::vector<OperatorId> get_dst_ops() const
+    {
+        std::vector<OperatorId> ops{};
+        const auto              op_list = all_ops();
+
+        for(auto op : op_list)
+        {
+            if(is_dst_op(op))
+            {
+                ops.emplace_back(op);
+            }
+        }
+        return ops;
+    }
+    bool path_exists_from_tensor_to_op(TensorId src_tensor, OperatorId dst_op) const
+    {
+        if(!tensor_exists(src_tensor) || !operator_exists(dst_op))
+        {
+            return false;
+        }
+        for(auto child_op : dst_ops_from_tensor(src_tensor))
+        {
+            if(path_exists_from_op_to_op(child_op, dst_op))
+            {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    bool path_exists_from_op_to_op(OperatorId src_op, OperatorId dst_op) const
+    {
+        if(!operator_exists(src_op) || !operator_exists(dst_op))
+        {
+            return false;
+        }
+        if(src_op == dst_op)
+        {
+            return true;
+        }
+        if(is_in(src_op, get_dst_ops()))
+        {
+            return false;
+        }
+        for(auto child_tensor : dst_tensors(src_op))
+        {
+            if(path_exists_from_tensor_to_op(child_tensor, dst_op))
+            {
+                return true;
+            }
+        }
+        return false;
+    }
+
+private:
+    AdjList _adj_src_tensors{};
+    AdjList _adj_dst_tensors{};
+    AdjList _adj_src_ops{};
+    AdjList _adj_dst_ops{};
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH */