Remove dynamic fusion prototype with tests and examples

Public headers of the new experimental dynamic fusion can be found in arm_compute/dynamic_fusion/ New examples on how to use the interface can be found in tests/validation/dynamic_fusion/gpu/Integration.cpp Resolves COMPMID-5683 Change-Id: I7ccb902a227fb487562df15fc3c30118d1d95bbd Signed-off-by: SiCong Li <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8671 Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: SiCong Li <sicong.li@arm.com> 2022-11-09 15:57:48 +0000
committer: SiCong Li <sicong.li@arm.com> 2022-11-22 14:09:34 +0000
commit: 31df05a1870662a7288fbaeb6fbc7fc458bb5a73 (patch)
tree: e75a132b8b5fd21cbceec8d0aa88da893e9c4f43 /src/core/experimental/dynamic_fusion
parent: 73bb6b7ad80801e56633ad4ea12b0404b586a979 (diff)
download: ComputeLibrary-31df05a1870662a7288fbaeb6fbc7fc458bb5a73.tar.gz
24 files changed, 0 insertions, 5696 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp
deleted file mode 100644
index 9b6daae619..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.cpp
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ClKernelBlueprint::ClKernelBlueprint()
-    : _impl{ std::make_unique<ClKernelBlueprint::Implementation>() }
-{
-}
-
-ClKernelBlueprint::~ClKernelBlueprint() = default;
-
-ClKernelBlueprint::Implementation &ClKernelBlueprint::impl()
-{
-    return *_impl;
-}
-const ClKernelBlueprint::Implementation &ClKernelBlueprint::impl() const
-{
-    return *_impl;
-}
-
-Status add_tensor(ClKernelBlueprint &kernel_blueprint, ITensorInfo *tensor_info, ArgumentID &id, ArgumentID merge_point)
-{
-    id = kernel_blueprint.impl().add_kernel_tensor(tensor_info, merge_point);
-    return Status{};
-}
-
-Status add_kcomp_eltwise_op(ClKernelBlueprint &kernel_blueprint, const ClElementwiseKernelDescriptor &desc,
-                            ArgumentID src0_id, ArgumentID src1_id, ArgumentID &dst_id)
-{
-    kernel_blueprint.impl().add_component(
-        std::make_unique<ClElementwiseKernelComponent>(
-            &kernel_blueprint,
-            desc,
-            SharedVarLink{ src0_id, SharedVarIO::Input },
-            SharedVarLink{ src1_id, SharedVarIO::Input },
-            SharedVarLink{ dst_id, SharedVarIO::Output }));
-
-    return Status{};
-}
-
-Status add_kcomp_floor(ClKernelBlueprint &kernel_blueprint, const ClFloorKernelDescriptor &,
-                       ArgumentID src_id, ArgumentID &dst_id)
-{
-    kernel_blueprint.impl().add_component(
-        std::make_unique<ClFloorKernelComponent>(
-            &kernel_blueprint,
-            SharedVarLink{ src_id, SharedVarIO::Input },
-            SharedVarLink{ dst_id, SharedVarIO::Output }));
-
-    return Status{};
-}
-
-Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID, ArgumentID &)
-{
-    return Status{};
-}
-
-Status add_kcomp_direct_conv2d(ClKernelBlueprint                    &kernel_blueprint,
-                               const ClDirectConv2dKernelDescriptor &direct_conv2d_desc,
-                               ArgumentID src_id, ArgumentID weight_id, ArgumentID bias_id, ArgumentID &dst_id)
-{
-    kernel_blueprint.impl().add_component(
-        std::make_unique<ClDirectConvolutionKernelComponent>(
-            &kernel_blueprint,
-            direct_conv2d_desc,
-            SharedVarLink{ src_id, SharedVarIO::Input },
-            SharedVarLink{ weight_id, SharedVarIO::Input },
-            SharedVarLink{ dst_id, SharedVarIO::Output },
-            SharedVarLink{ bias_id, SharedVarIO::Input }));
-
-    return Status{};
-}
-
-Status add_kcomp_store(ClKernelBlueprint &kernel_blueprint, const StoreType &store_type, ArgumentID src_tile, ArgumentID dst_tile)
-{
-    switch(store_type)
-    {
-        case StoreType::StoreBlockBoundaryAware:
-            kernel_blueprint.impl().add_component(
-                std::make_unique<ClStoreBlockBoundaryAwareKernelComponent>(
-                    &kernel_blueprint,
-                    SharedVarLink{ src_tile, SharedVarIO::Input },
-                    SharedVarLink{ dst_tile, SharedVarIO::Output }));
-            break;
-        case StoreType::TStoreIndirectWidthSelect:
-            kernel_blueprint.impl().add_component(
-                std::make_unique<ClStoreIndirectWidthSelectKernelComponent>(
-                    &kernel_blueprint,
-                    SharedVarLink{ src_tile, SharedVarIO::Input },
-                    SharedVarLink{ dst_tile, SharedVarIO::Output }));
-            break;
-        default:
-            ARM_COMPUTE_ERROR("Store mode not yet supported.");
-    }
-
-    return Status{};
-}
-
-Status update_merge_point(ClKernelBlueprint &bp, ArgumentID t_id, ArgumentID merge_point)
-{
-    return bp.impl().update_merge_point(t_id, merge_point);
-}
-
-Status set_tile_info(ClKernelBlueprint &bp, const TileDescriptor &tile_info)
-{
-    bp.impl().set_tile_info(tile_info);
-    return Status{};
-}
-Status build(ClKernelCode &code, const ClCodeBuilderContext &, ClKernelBlueprint &kernel_blueprint)
-{
-    kernel_blueprint.impl().finalize();
-    code.name = kernel_blueprint.impl().build_kernel_name();
-    code.code = kernel_blueprint.impl().build_code();
-
-    code.config_id     = kernel_blueprint.impl().build_config_id();
-    code.build_options = kernel_blueprint.impl().build_options();
-    code.window        = kernel_blueprint.impl().get_execution_window();
-    code.arguments     = kernel_blueprint.impl().get_arguments();
-
-    return Status{};
-}
-DependencyGraph get_dependency_graph(const ClKernelBlueprint &blueprint)
-{
-    return blueprint.impl().get_graph();
-}
-Status tune_static(ClExecutionDescriptor &, const ClKernelCode &)
-{
-    return Status{};
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
deleted file mode 100644
index 463fc5e7cf..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H
-#define ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H
-
-#include "arm_compute/core/CL/CLCompileContext.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/experimental/ClWorkload.h"
-#include "arm_compute/core/experimental/DependencyGraph.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-using ArgumentID = DependencyGraph::Id;
-
-static constexpr ArgumentID g_arg_placeholder = DependencyGraph::empty_id();
-
-/** Intermediate representation of the final, complete kernel source. */
-class ClKernelBlueprint
-{
-public:
-    ClKernelBlueprint();
-    ~ClKernelBlueprint();
-
-private:
-    struct Implementation;
-    std::unique_ptr<Implementation> _impl;
-
-public:
-    Implementation       &impl();
-    const Implementation &impl() const;
-};
-
-///// Kernel Components /////
-/** Component: Eltwise Operator */
-Status add_kcomp_eltwise_op(ClKernelBlueprint &, const ClElementwiseKernelDescriptor &, ArgumentID src0_id,
-                            ArgumentID src1_id, ArgumentID &dst_id);
-
-/** Component: Floor */
-Status add_kcomp_floor(ClKernelBlueprint &, const ClFloorKernelDescriptor &, ArgumentID src_id,
-                       ArgumentID &dst_id);
-
-/** Component: Activation */
-Status add_kcomp_activation(ClKernelBlueprint &, const ClActivationKernelDescriptor &, ArgumentID src_id, ArgumentID &dst_id);
-
-/** Component: Direct Convolution **/
-Status add_kcomp_direct_conv2d(ClKernelBlueprint &, const ClDirectConv2dKernelDescriptor &,
-                               ArgumentID src_id, ArgumentID weight_id, ArgumentID bias_id, ArgumentID &dst_id);
-
-Status add_kcomp_store(ClKernelBlueprint &, const StoreType &store_type, ArgumentID src_id, ArgumentID dst_id);
-
-Status add_tensor(ClKernelBlueprint &, ITensorInfo *, ArgumentID &, ArgumentID merge_point = DependencyGraph::empty_id());
-
-///// Kernel Components /////
-
-///// Building /////
-
-/** Update existing merge tensor @p merge_point to point to @p t_id
- *
- * @param t_id
- * @param merge_point
- * @return Status
- */
-Status update_merge_point(ClKernelBlueprint &, ArgumentID t_id, ArgumentID merge_point);
-
-/** Get dependency graph
- *
- * @return DependencyGraph
- */
-DependencyGraph get_dependency_graph(const ClKernelBlueprint &blueprint);
-
-/** All information required for building the @ref ClKernelCode */
-struct ClCodeBuilderContext
-{
-    GpuInfo gpu_info{};
-};
-
-Status set_tile_info(ClKernelBlueprint &, const TileDescriptor &);
-
-/** Build final kernel source from KernelBlueprint */
-Status build(ClKernelCode &code, const ClCodeBuilderContext &, ClKernelBlueprint &);
-
-///// Building /////
-
-///// Tuning /////
-
-Status tune_static(ClExecutionDescriptor &, const ClKernelCode &);
-
-///// Tuning /////
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_CLKERNELBUILDINGAPI_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
deleted file mode 100644
index 04919acb83..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
+++ /dev/null
@@ -1,930 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
-
-#include "arm_compute/core/CL/CLCompileContext.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GPUTarget.h"
-#include "src/core/common/Macros.h"
-#include "support/Requires.h"
-#include "support/StringSupport.h"
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
-
-#include <iostream>
-#include <queue>
-#include <stack>
-#include <string>
-#include <unordered_set>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-/** We introduce the concept of *Shared Variables* in the context of kernel building.
- *  They are variables that can be accessed / shared among all the kernel components within a single kernel.
- *  For now we consider 2 groups of shared variables:
- *      Argument: The argument variables (parameters) of a kernel
- *      Automatic: The automatic variables declared inside a kernel
- *  All Shared Variables have the same kernel scope, and are thus visible to all kernel components
-*/
-
-enum class SharedVarIO
-{
-    Input,
-    Output
-};
-
-enum class SharedVarGroup
-{
-    Argument, // Parameters to a kernel function  == dst or src tensors of the whole blueprint graph
-    Automatic // Automatic variables declared within the kernel body == intermediate tensors of the whole blueprint graph
-};
-
-/** Specifies a shared variable link for a component.
- * It describes all the information that's available when a component is constructed / added:
- *  e.g. its linkage (via ArgumentID and io) and its group
- * This is not shared variable on its own, but is used for instantiating a SharedVar when building the code
- */
-struct SharedVarLink
-{
-    ArgumentID  arg_id{ g_arg_placeholder };
-    SharedVarIO io{ SharedVarIO::Input };
-    bool        is_empty() const
-    {
-        return arg_id == g_arg_placeholder;
-    }
-};
-
-/** A table of all the variables used in the kernel / blueprint
- * Because we limit the DependencyGraph in the blueprint to a Linear Sequence for now, we only allow ** a single global variable (the accumulator) **
- *
- * NOTE: the order they appear in the table is the order of their "declaration" in the component code, and is also their ID
- * NOTE: the variables all have the scope of the full kernel function
- */
-class SharedVarTable
-{
-public:
-    /** A fully realized SharedVarLink
-     */
-    struct SharedVar
-    {
-        ArgumentID            arg_id{ g_arg_placeholder };
-        SharedVarIO           io{ SharedVarIO::Input };
-        SharedVarGroup        group{ SharedVarGroup::Argument };
-        std::string           uniq_name{}; // Unique name, also the final variable name used in the built code
-        ClKernelArgDescriptor desc{};      // Automatic variables can and should still be described using this struct
-        bool                  is_empty() const
-        {
-            return arg_id == g_arg_placeholder;
-        }
-    };
-
-    class Arguments
-    {
-    public:
-        Arguments() = default;
-        void add_var(const SharedVar &var)
-        {
-            ARM_COMPUTE_ERROR_ON(var.group != SharedVarGroup::Argument);
-            _vars.push_back(var);
-        }
-        std::vector<SharedVar> get_all_vars() const
-        {
-            return _vars;
-        }
-        std::vector<SharedVar> get_src_vars() const
-        {
-            std::vector<SharedVar> src_vars;
-            std::copy_if(_vars.begin(), _vars.end(), std::back_inserter(src_vars), [](const SharedVar & var)
-            {
-                return var.io == SharedVarIO::Input;
-            });
-            return src_vars;
-        }
-        SharedVar get_dst_var() const
-        {
-            std::vector<SharedVar> dst_vars;
-            std::copy_if(_vars.begin(), _vars.end(), std::back_inserter(dst_vars), [](const SharedVar & var)
-            {
-                return var.io == SharedVarIO::Output;
-            });
-            ARM_COMPUTE_ERROR_ON(dst_vars.size() != 1);
-            return dst_vars.at(0);
-        }
-
-    private:
-        std::vector<SharedVar> _vars{};
-    };
-
-    /** Create a SharedVar for a corresponding SharedVarLink (contains ArgumentID). If one has already been created for the SharedVarLink, simply return it instead of creating a new one
-     *
-     * @note: The order of insertion is important. There is one precondition:
-     *        PRECOND: The components have been sorted topologically / is being traversed in topological order
-     *                 This ensures that all the consumer var links (Output, Automatic Links) can consume (return) the producer var links when they're referred
-     */
-    void add(SharedVarLink var_link, SharedVarGroup group, ClKernelArgDescriptor runtime_desc, const std::string &name = "unnamed")
-    {
-        ARM_COMPUTE_ERROR_ON_MSG(var_link.is_empty(), "Non-empty SharedVarLink expected");
-        if(!get(var_link).is_empty())
-        {
-            return;
-        }
-
-        auto              var_id = _num_var;
-        std::stringstream ss;
-        ss << name << "_" << var_id;
-        const auto uniq_name = ss.str();
-        SharedVar  var{ var_link.arg_id, var_link.io, group, uniq_name, runtime_desc };
-
-        if(group == SharedVarGroup::Argument)
-        {
-            _arguments.emplace(var_id, var);
-            _arg_id_map.emplace(var_link.arg_id, var_id);
-            _num_var++;
-        }
-        else if(group == SharedVarGroup::Automatic)
-        {
-            if(_global_vars.empty())
-            {
-                if(var_link.io == SharedVarIO::Output)
-                {
-                    _global_vars.emplace(var_id, var);
-                    _arg_id_map.emplace(var_link.arg_id, var_id);
-                    _num_var++;
-                }
-                else
-                {
-                    ARM_COMPUTE_ERROR("Component likely not traversed in topological order");
-                }
-            }
-            else
-            {
-                // Associate additional SharedVarLinks with the single global shared variable
-                const auto global_var_id     = _global_vars.begin()->first;
-                _arg_id_map[var_link.arg_id] = global_var_id;
-            }
-        }
-        else
-        {
-            ARM_COMPUTE_ERROR("Unrecognised SharedVarGroup");
-        }
-    }
-
-    /** Get the SharedVar associated with @p var_link
-     *
-     * @param var_link
-     * @return SharedVar
-     */
-    SharedVar get(const SharedVarLink &var_link) const
-    {
-        const SharedVar empty_var{};
-        if(_arg_id_map.find(var_link.arg_id) != _arg_id_map.end())
-        {
-            const auto var_id  = _arg_id_map.at(var_link.arg_id);
-            const auto arg_var = _arguments.find(var_id);
-            if(arg_var != _arguments.end())
-            {
-                return arg_var->second;
-            }
-            else
-            {
-                return _global_vars.at(var_id);
-            }
-        }
-        return empty_var;
-    }
-
-    /** @note The arguments are returned in the order they are added
-     */
-    Arguments get_kernel_arguments() const
-    {
-        Arguments args{};
-        for(const auto &a : _arguments)
-        {
-            args.add_var(a.second);
-        }
-        return args;
-    }
-
-private:
-    using VarID = int32_t;
-
-private:
-    std::map<VarID, SharedVar>  _global_vars{}; // Shared, global variable
-    std::map<VarID, SharedVar>  _arguments{};
-    std::map<ArgumentID, VarID> _arg_id_map{}; // Track ArgumentIDs that have already been added
-    VarID _num_var{ 0 };
-};
-
-enum class ComponentType
-{
-    Simple,
-    Complex,
-    Store
-};
-
-using ComponentID   = DependencyGraph::Id;
-using ComponentList = std::vector<ComponentID>;
-class IClKernelComponent
-{
-public:
-    using Link = SharedVarLink;
-    using Tag  = std::string;
-    struct TagVal
-    {
-        TagVal() = default;
-        TagVal(const SharedVarTable::SharedVar &var)
-            : value{ var.uniq_name }
-        {
-        }
-
-        template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
-        TagVal(T val)
-            : value{ support::cpp11::to_string(val) }
-        {
-        }
-
-        TagVal(const std::string &val)
-            : value{ val }
-        {
-        }
-
-        TagVal(const char *val)
-            : value{ std::string(val) }
-        {
-        }
-
-        TagVal(const DataType &data_type)
-            : value{ get_cl_type_from_data_type(data_type) }
-        {
-        }
-
-        std::string value{};
-    };
-    using TagLUT = std::unordered_map<Tag, TagVal>; // Used to instantiating a code template / replacing tags
-public:
-    IClKernelComponent(ClKernelBlueprint *blueprint)
-        : _blueprint(blueprint)
-    {
-    }
-
-    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClKernelComponent);
-
-    virtual ~IClKernelComponent()                        = default;
-    virtual ComponentType     get_component_type() const = 0;
-    virtual std::vector<Link> get_links() const          = 0;
-    virtual std::string       name() const               = 0;
-
-    // @note: some tags can be unused since they could be used only for the macros, or only for the component code
-    static std::string replace_tags(const std::string &code_template, const TagLUT &tags)
-    {
-        std::string replaced_code    = "";
-        bool        scanning_pattern = false;
-        std::string pattern_found    = "";
-        for(size_t i = 0; i < code_template.size() - 1; ++i)
-        {
-            if(!scanning_pattern)
-            {
-                if(code_template[i] == '{' && code_template[i + 1] == '{')
-                {
-                    i += 1;
-                    scanning_pattern = true;
-                    pattern_found    = "";
-                }
-                else
-                {
-                    replaced_code += code_template[i];
-                }
-            }
-            else
-            {
-                if(code_template[i] == '}' && code_template[i + 1] == '}')
-                {
-                    i += 1;
-                    scanning_pattern = false;
-                    std::string err  = "Pattern " + pattern_found + " not found in tags";
-                    ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
-                    replaced_code += tags.find(pattern_found)->second.value;
-                }
-                else
-                {
-                    pattern_found += code_template[i];
-                }
-            }
-        }
-
-        return replaced_code;
-    }
-    ComponentID id() const
-    {
-        return _id;
-    }
-    void set_id(ComponentID id)
-    {
-        _id = id;
-    }
-
-    virtual std::set<std::string> get_headers_list() const
-    {
-        return std::set<std::string> {};
-    }
-
-    virtual std::string get_additional_macros() const
-    {
-        return "";
-    }
-
-    virtual std::string get_component_code() const
-    {
-        return "";
-    }
-
-    virtual Window get_window() const
-    {
-        return Window{};
-    }
-
-    /** Get the tag look-up table used to instantiate the component code.
-     *
-     * @param vtable
-     * @return TagLUT
-     */
-    virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const = 0;
-
-    /** Allocate all shared variables used by the component in the @p vtable
-     *
-     * @param vtable
-     */
-    virtual void allocate_shared_vars(SharedVarTable &vtable) const = 0;
-
-    virtual std::string get_dst_addr_calculation() const
-    {
-        return "";
-    }
-
-    /** Generate config id of the component
-     *
-     * @return std::string
-     */
-    virtual std::string generate_config_id() const
-    {
-        return "";
-    }
-
-    virtual CLBuildOptions generate_build_options() const
-    {
-        return CLBuildOptions{};
-    }
-
-protected:
-    ClKernelBlueprint *_blueprint;
-
-private:
-    ComponentID _id{};
-};
-
-using ComponentUniquePtr = std::unique_ptr<IClKernelComponent>;
-
-/** Intermediate representation of the final, complete kernel source.
- */
-struct ClKernelBlueprint::Implementation
-{
-public:
-    Implementation()  = default;
-    ~Implementation() = default;
-
-public:
-    Status update_merge_point(ArgumentID t_id, ArgumentID merge_point)
-    {
-        return _graph.update_merge_point(t_id, merge_point);
-    }
-
-    ArgumentID add_kernel_tensor(ITensorInfo *tensor_info, ArgumentID merge_point = DependencyGraph::empty_id())
-    {
-        const auto id = _graph.add_tensor(merge_point);
-        if(_kernel_tensors.find(id) == _kernel_tensors.end())
-        {
-            _kernel_tensors.insert(std::make_pair(id, tensor_info));
-        }
-        return id;
-    }
-
-    void set_tile_info(const TileDescriptor &tile_info)
-    {
-        _tile_info = tile_info;
-    }
-
-    SharedVarGroup group(ArgumentID arg_id) const
-    {
-        if(arg_id == g_arg_placeholder)
-        {
-            // In case of placeholder, don't care what we return;
-            return SharedVarGroup::Argument;
-        }
-        return _shared_var_group_lut.at(arg_id);
-    }
-
-    void validate_arg_ids(std::initializer_list<ArgumentID> args) const
-    {
-        for(const auto arg_id : args)
-        {
-            ARM_COMPUTE_UNUSED(arg_id);
-            ARM_COMPUTE_ERROR_ON_MSG(_kernel_tensors.find(arg_id) == _kernel_tensors.end() && arg_id != g_arg_placeholder,
-                                     "Trying to use an argument that hasn't been added to the blueprint");
-        }
-    }
-
-    void add_component(ComponentUniquePtr component)
-    {
-        if(component->get_component_type() == ComponentType::Complex)
-        {
-            ++_num_complex_components;
-            ARM_COMPUTE_ERROR_ON_MSG(_num_complex_components > 1, "Only one complex component per blueprint is supported.");
-        }
-
-        // Get an unique ID for the component that's being added
-        std::vector<ArgumentID> src_tensors;
-        std::vector<ArgumentID> dst_tensors;
-        for(const auto &link : component->get_links())
-        {
-            if(link.is_empty())
-            {
-                continue;
-            }
-            if(link.io == SharedVarIO::Input)
-            {
-                src_tensors.push_back(link.arg_id);
-            }
-            else
-            {
-                dst_tensors.push_back(link.arg_id);
-            }
-        }
-        const ComponentID component_id = _graph.add_operator(src_tensors, dst_tensors).second;
-        component->set_id(component_id);
-
-        // Add this component to the component graph. Don't connect it to anything yet
-        _component_graph.emplace(component_id, ComponentList{});
-
-        // For every { arg_id, arg_io } passed along with this component...
-        for(const auto &link : component->get_links())
-        {
-            const ArgumentID &arg_id = link.arg_id;
-            const SharedVarIO &arg_io = link.io;
-
-            // Add the arg_id to the map describing the input/output relationship between an argument and the components that use it, if it doesn't yet exist there
-            if(_outgoing_components.find(arg_id) == _outgoing_components.end())
-            {
-                _outgoing_components.emplace(arg_id, ComponentList{});
-                _incoming_components.emplace(arg_id, ComponentList{});
-            }
-
-            // If it's an input argument, connect any other component that has it as output with this component
-            // Additionally, set this component as one that treats this argument as "Input" (append to index 0)
-            // This is used so that we keep track of whether two components use the same argument, one as input and one as output
-            if(arg_io == SharedVarIO::Input)
-            {
-                for(const auto &prev_component : _incoming_components[arg_id])
-                {
-                    _component_graph[prev_component].push_back(component_id);
-                }
-
-                _outgoing_components[arg_id].push_back(component_id);
-            }
-            // If it's an output argument, connect this component with any other component that has it as input
-            // Additionally, set this component as one that treats this argument as "Output" (append to index 1)
-            else
-            {
-                if(component->get_component_type() == ComponentType::Store)
-                {
-                    ARM_COMPUTE_ERROR_ON_MSG(_dst_id >= 0, "Trying to add more than one dst argument to the graph");
-                    _dst_id = arg_id;
-                }
-
-                for(const auto &subseq_component : _outgoing_components[arg_id])
-                {
-                    _component_graph[component_id].push_back(subseq_component);
-                }
-
-                _incoming_components[arg_id].push_back(component_id);
-            }
-        }
-
-        ARM_COMPUTE_ERROR_ON_MSG(_graph.get_root_ops().size() != 1, "Trying to add more than one root to the graph");
-
-        // Finally, add this component to the dictionary of components
-        _components.insert(std::make_pair(component_id, std::move(component)));
-    }
-
-    std::string build_kernel_name() const
-    {
-        std::string name = "";
-
-        traverse([&](std::stack<ComponentID> stack)
-        {
-            name += _components.find(stack.top())->second->name() + (stack.size() > 2 ? "___" : "");
-        });
-
-        return name;
-    }
-
-    std::string build_code()
-    {
-        ARM_COMPUTE_ERROR_ON_MSG(_graph_root == -1, "No root found in the component graph");
-
-        // These data structures will hold the data from all the components in the blueprint
-        std::set<std::string>    headers_list{};
-        std::set<std::string>    additional_macros{};
-        std::vector<std::string> component_codes{}; // vector because order matters
-
-        // Step 1: Allocate all kernel argument shared variables before generating the component code
-        auto stack = topological_sort();
-        while(!stack.empty())
-        {
-            auto  curr_component_id = stack.top();
-            auto &curr_component    = _components.find(curr_component_id)->second;
-
-            curr_component->allocate_shared_vars(_vtable);
-
-            stack.pop();
-        }
-        // Step 2: Generate component codes
-        stack = topological_sort();
-        while(!stack.empty())
-        {
-            auto  curr_component_id = stack.top();
-            auto &curr_component    = _components.find(curr_component_id)->second;
-
-            auto       curr_headers_list      = curr_component->get_headers_list();
-            auto       curr_additional_macros = curr_component->get_additional_macros();
-            auto       curr_component_code    = curr_component->get_component_code();
-            const auto var_lut                = curr_component->get_tag_lut(_vtable); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
-            component_codes.push_back(IClKernelComponent::replace_tags(curr_component_code, var_lut));
-
-            headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
-            if(!curr_additional_macros.empty()) // Some components might not have any
-            {
-                additional_macros.insert(IClKernelComponent::replace_tags(curr_additional_macros, var_lut));
-            }
-
-            stack.pop();
-        }
-
-        // Step 3: Assemble the data gathered by traversing the graph into the string "code"
-        std::string code = "";
-
-        for(auto &header : headers_list)
-        {
-#if defined(EMBEDDED_KERNELS)
-            code += CLKernelLibrary::get().get_program(header).first;
-#else  // defined(EMBEDDED_KERNELS)
-            code += "#include \"" + header + "\"\n";
-#endif // defined(EMBEDDED_KERNELS)
-        }
-
-        for(auto &macros : additional_macros)
-        {
-            code += macros;
-        }
-
-        code += generate_kernel_signature(_vtable.get_kernel_arguments());
-
-        code += "\n{\n\n";
-
-        code += "    //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
-        code += generate_global_section();
-        code += "    //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
-
-        for(auto &component_code : component_codes)
-        {
-            code += component_code;
-        }
-
-        code += "}\n";
-
-        return code;
-    }
-
-    /** Generate config id of the entire kernel
-     *
-     * Format: kernel_name--comp0_config_id--comp1_config_id--...
-     *
-     * @return std::string
-     */
-    std::string build_config_id() const
-    {
-        std::string config_id = build_kernel_name();
-        traverse([&](std::stack<ComponentID> stack)
-        {
-            config_id += "--" + _components.find(stack.top())->second->generate_config_id() + "--";
-        });
-
-        return config_id;
-    }
-
-    CLBuildOptions build_options() const
-    {
-        CLBuildOptions build_opts{};
-
-        traverse([&](std::stack<ComponentID> stack)
-        {
-            build_opts.add_options(_components.find(stack.top())->second->generate_build_options().options());
-        });
-
-        return build_opts;
-    }
-
-    TileDescriptor get_tile_info() const
-    {
-        return _tile_info;
-    }
-
-    // Get the global execution window, i.e. that of the root component
-    Window get_execution_window() const
-    {
-        ARM_COMPUTE_ERROR_ON_MSG(_graph_root == -1, "No root found in the component graph");
-        ARM_COMPUTE_ERROR_ON_MSG(_dst_id == -1, "Destination Tensor Id should be ready before calling get_execution_window()");
-
-        return _components.find(_graph_root)->second->get_window();
-    }
-
-    ArgumentID get_dst_id() const
-    {
-        return _dst_id;
-    }
-
-    ClKernelArgList get_arguments() const
-    {
-        ClKernelArgList arg_list{};
-        for(const auto &arg_var : _vtable.get_kernel_arguments().get_all_vars())
-        {
-            arg_list[arg_var.desc.arg_id] = arg_var.desc;
-        }
-        return arg_list;
-    }
-
-    /** Get the arguments as shared vars from the vtable
-     *
-     * @return SharedVarTable::Arguments
-     */
-    SharedVarTable::Arguments get_argument_shared_vars() const
-    {
-        return _vtable.get_kernel_arguments();
-    }
-
-    const ITensorInfo *get_kernel_argument_info(const ArgumentID id) const
-    {
-        auto it = _kernel_tensors.find(id);
-        if(it != _kernel_tensors.end())
-        {
-            return it->second;
-        }
-        return nullptr;
-    }
-
-    ITensorInfo *get_kernel_argument_info(const ArgumentID id)
-    {
-        auto it = _kernel_tensors.find(id);
-        if(it != _kernel_tensors.end())
-        {
-            return it->second;
-        }
-        return nullptr;
-    }
-    /** Finalize graph construction. Graph is expected to not mutate after being finalized
-     */
-    void finalize()
-    {
-        cache_root_component();
-        assign_shared_var_group();
-    }
-
-    DependencyGraph get_graph() const
-    {
-        return _graph;
-    }
-
-private:
-    void cache_root_component()
-    {
-        const auto roots = _graph.get_root_ops();
-        ARM_COMPUTE_ERROR_ON_MSG(roots.size() != 1, "Trying to add more than one root to the graph");
-        _graph_root = roots.at(0);
-    }
-    /** Assign the group for each shared var. Can only be performed at the end of the graph construction, before building
-     */
-    void assign_shared_var_group()
-    {
-        for(const auto &tensor : _kernel_tensors)
-        {
-            const auto tensor_id = tensor.first;
-            if(_graph.is_src_tensor(tensor_id) || _graph.is_dst_tensor(tensor_id))
-            {
-                _shared_var_group_lut[tensor_id] = SharedVarGroup::Argument;
-            }
-            else
-            {
-                _shared_var_group_lut[tensor_id] = SharedVarGroup::Automatic;
-            }
-        }
-    }
-
-    void topological_sort_utility(ComponentID component_id, std::unordered_set<ComponentID> &visited, std::stack<ComponentID> &stack) const
-    {
-        visited.insert(component_id);
-
-        for(auto connected_component : _component_graph.find(component_id)->second)
-        {
-            if(visited.find(connected_component) == visited.end())
-            {
-                topological_sort_utility(connected_component, visited, stack);
-            }
-        }
-
-        stack.push(component_id);
-    }
-
-    std::stack<ComponentID> topological_sort() const
-    {
-        std::stack<ComponentID>         stack{};
-        std::unordered_set<ComponentID> visited{};
-
-        topological_sort_utility(_graph_root, visited, stack);
-
-        return stack;
-    }
-
-    void traverse(const std::function<void(std::stack<ComponentID>)> &func) const
-    {
-        std::stack<ComponentID> stack = topological_sort();
-
-        while(!stack.empty())
-        {
-            func(stack);
-            stack.pop();
-        }
-    }
-
-    std::string generate_argument_declaration(const SharedVarTable::SharedVar &var) const
-    {
-        ARM_COMPUTE_ERROR_ON_MSG(var.group != SharedVarGroup::Argument, "An argument declaration can only be generated from a kernel argument");
-        std::string code;
-        switch(var.desc.tensor_arg_type)
-        {
-            case ClKernelTensorArgType::Vector:
-            {
-                code += "\n    VECTOR_DECLARATION(" + var.uniq_name + ")";
-                break;
-            }
-            case ClKernelTensorArgType::Image:
-            {
-                code += "\n    IMAGE_DECLARATION(" + var.uniq_name + ")";
-                break;
-            }
-            case ClKernelTensorArgType::Image_3D:
-            {
-                code += "\n    IMAGE_DECLARATION(" + var.uniq_name + "),";
-                code += "\n    uint " + var.uniq_name + "_stride_z";
-                break;
-            }
-            case ClKernelTensorArgType::Image_3D_Export_To_ClImage2D:
-            {
-                code += "\n    __read_only image2d_t " + var.uniq_name + "_img,";
-                code += "\n    uint " + var.uniq_name + "_stride_z";
-                break;
-            }
-            case ClKernelTensorArgType::Tensor_4D_t_Buffer:
-            {
-                code += "\n    TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
-                break;
-            }
-            case ClKernelTensorArgType::Tensor_4D_t_Image:
-            {
-                code += "\n    TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
-                break;
-            }
-            default:
-            {
-                ARM_COMPUTE_ERROR("Unsupported declaration generation for ClKernelTensorArgType");
-            }
-        }
-        return code;
-    }
-
-    std::string generate_kernel_signature(const SharedVarTable::Arguments &argument_list) const
-    {
-        std::string code = "\n__kernel void " + build_kernel_name() + "(";
-
-        for(const auto &arg : argument_list.get_all_vars())
-        {
-            code += generate_argument_declaration(arg) + ",";
-        }
-
-        code[code.length() - 1] = ')';
-
-        return code;
-    }
-
-    std::string generate_global_section() const
-    {
-        auto       dst_info   = get_kernel_argument_info(_dst_id);
-        auto       dst_w      = dst_info->dimension(0);
-        const auto tile_w     = std::max(1, get_execution_window().x().step());
-        const auto tile_h     = std::max(1, get_execution_window().y().step());
-        auto       leftover_w = dst_w % tile_w;
-
-        std::string code = "";
-        code += std::string("    int cout = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
-        code += std::string("    int mout = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
-        code += std::string("    int bout = GET_SPATIAL_IDX(2, 1, 0);\n\n");
-
-        switch(_tile_info.clipping)
-        {
-            case ClippingStrategy::TOP_LEFT:
-                code += "    const bool g_cond_x = (cout == 0);\n";
-                code += "    const bool g_cond_y = (mout == 0);\n";
-                break;
-            case ClippingStrategy::TOP_RIGHT:
-                code += "    const bool g_cond_x = ((cout + 1) * " + std::to_string(tile_w) + " >= " + std::to_string(_tile_info.boundaries.x()) + ");\n";
-                code += "    const bool g_cond_y = (mout == 0);\n";
-                break;
-            case ClippingStrategy::BOTTOM_LEFT:
-                code += "    const bool g_cond_x = (cout == 0);\n";
-                code += "    const bool g_cond_y = ((mout + 1) * " + std::to_string(tile_h) + " >= " + std::to_string(_tile_info.boundaries.y()) + ");\n";
-                break;
-            case ClippingStrategy::BOTTOM_RIGHT:
-                code += "    const bool g_cond_x = ((cout + 1) * " + std::to_string(tile_w) + " >= " + std::to_string(_tile_info.boundaries.x()) + ");\n";
-                code += "    const bool g_cond_y = ((mout + 1) * " + std::to_string(tile_h) + " >= " + std::to_string(_tile_info.boundaries.y()) + ");\n";
-                break;
-            default:
-                ARM_COMPUTE_ERROR("Unsupported clipping strategy");
-        }
-
-        return code;
-    }
-
-    TileDescriptor _tile_info{};
-
-    int32_t _num_complex_components{};
-
-    ArgumentID _dst_id{ -1 }; // Initially set to -1, which means the graph has no dst yet, since node IDs are positive numbers
-
-    DependencyGraph _graph{};
-
-    // Tensors, components and IDs with corresponding ptrs (except intermediate)
-    std::unordered_map<ComponentID, ComponentUniquePtr> _components{};
-    std::unordered_map<ArgumentID, ITensorInfo *>       _kernel_tensors{};
-    // Argument group lookup. Can be replaced by extending the ArgumentID type to include group info
-    std::unordered_map<ArgumentID, SharedVarGroup> _shared_var_group_lut{};
-
-    // Tracks all variables (e.g.: kernel arguments, kernel "global variables")
-    SharedVarTable _vtable{};
-
-    // Component directed graph (represented by an adjecency list of Component IDs)
-    // This is used to understand the ordering and bindings between components when generating the kernel
-    // It's initially set to -1 which means the graph has no root yet, since node IDs are positive numbers
-    ComponentID _graph_root{ -1 };
-    std::unordered_map<ComponentID, ComponentList> _component_graph{};
-
-    // Additional data structures used to define the relationships between components and arguments
-    // For each argument, it contains the list of components that consider it as an incoming or an outgoing argument
-    // E.g. tensor0  -> component0 -> tensor1
-    // _outgoing_components[tensor0] == {component0} (component0 is the outgoing component of tensor0. Component0 treats tensor0 as an input tensor)
-    // _incoming_components[tensor1] == {component0} (component0 is the incoming component of tensor1. Component1 treats tensor1 as an output tensor)
-    std::unordered_map<ArgumentID, ComponentList> _outgoing_components{};
-    std::unordered_map<ArgumentID, ComponentList> _incoming_components{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Utils.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Utils.h
deleted file mode 100644
index 1b10050559..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Utils.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_UTILS
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_UTILS
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-inline ::std::ostream &operator<<(::std::ostream &os, const CLBuildOptions::StringSet &build_opts)
-{
-    for(const auto &opt : build_opts)
-    {
-        os << opt << ",";
-    }
-    return os;
-}
-inline ::std::ostream &operator<<(::std::ostream &os, const CLBuildOptions &cl_build_opts)
-{
-    os << cl_build_opts.options();
-    return os;
-}
-
-inline std::string to_string(const CLBuildOptions &cl_build_opts)
-{
-    std::stringstream str;
-    str << cl_build_opts;
-    return str.str();
-}
-inline ::std::ostream &operator<<(::std::ostream &os, const ClKernelCode &code)
-{
-    os << "name: " << code.name << std::endl;
-    os << "code: " << code.code << std::endl;
-    os << "build_opts: " << code.build_options << std::endl;
-    return os;
-}
-inline std::string to_string(const ClKernelCode &code)
-{
-    std::stringstream str;
-    str << code;
-    return str.str();
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_UTILS
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
deleted file mode 100644
index 811cd79811..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h"
-
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/CL/ICLKernel.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ComponentType ClDirectConvolutionKernelComponent::get_component_type() const
-{
-    return ComponentType::Complex;
-}
-
-std::set<std::string> ClDirectConvolutionKernelComponent::get_headers_list() const
-{
-    return std::set<std::string> { "helpers.h", "tile_helpers.h" };
-}
-
-Window ClDirectConvolutionKernelComponent::get_window() const
-{
-    const auto src_info    = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
-    const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
-    auto       dst_info    = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-
-    // Get dst shape
-    PadStrideInfo pad_stride_info
-    {
-        static_cast<unsigned int>(_desc.conv2d.stride.x()),
-        static_cast<unsigned int>(_desc.conv2d.stride.y()),
-        static_cast<unsigned int>(_desc.conv2d.pad.left),
-        static_cast<unsigned int>(_desc.conv2d.pad.right),
-        static_cast<unsigned int>(_desc.conv2d.pad.top),
-        static_cast<unsigned int>(_desc.conv2d.pad.bottom),
-        DimensionRoundingType::FLOOR /*default rounding type*/
-    };
-    TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src_info, *weight_info, pad_stride_info);
-
-    // Output auto initialization if not yet initialized
-    auto_init_if_empty(*dst_info, output_shape,
-                       1,
-                       src_info->data_type(),
-                       src_info->quantization_info());
-
-    const unsigned int vec_size = std::min(static_cast<unsigned int>(dst_info->tensor_shape()[0]), 4u);
-    const unsigned int num_rows = (dst_info->tensor_shape()[0] > 16) ? ((src_info->data_type() == DataType::F32) ? 2U : 4U) : 1U;
-    // const unsigned int num_rows = 1;
-    // const unsigned int vec_size = tile_info.tile_dims.x();
-    // const unsigned int num_rows = tile_info.tile_dims.y();
-
-    // Create and configure kernel window
-    Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
-
-    const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], num_rows);
-    win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, num_rows));
-    win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1));
-
-    return win;
-}
-
-std::string ClDirectConvolutionKernelComponent::get_additional_macros() const
-{
-    return R"_()_"; // no macros
-}
-
-std::string ClDirectConvolutionKernelComponent::get_component_code() const
-{
-    const auto src_info  = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
-    const auto bias_info = _blueprint->impl().get_kernel_argument_info(_bias.arg_id);
-
-    ARM_COMPUTE_ERROR_ON_MSG(src_info->data_layout() != DataLayout::NHWC, "Only NHWC data layout is supported by this component.");
-
-    const auto channel_idx   = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::CHANNEL);
-    const auto k0            = adjust_vec_size(is_data_type_quantized(src_info->data_type()) ? 16u : 8u, src_info->dimension(channel_idx));
-    const bool leftover_loop = (src_info->dimension(channel_idx) % k0) != 0;
-
-    std::string code = R"_(
-    //------------------ START KERNEL {{meta_kernel_id}} ---------------------
-    // IN_0(src)            {{src}}
-    // IN_1(wei)            {{weight}}
-    )_";
-    if(bias_info != nullptr)
-    {
-        code += R"_(
-    // IN_1(bia)            {{bias}}
-    )_";
-    }
-    code += R"_(
-    // OUT(dst, accum)      {{dst}}
-
-    // Initialize the accumulators
-    TILE({{ACC_DATA_TYPE}}, M0, N0, {{dst}});
-    {
-        // All the tensor dimensions are passed at compile time.
-        // In case of dynamic tensor support, the following dimensions should be passed as function argument.
-    #define _IWEI_WIDTH {{WEI_WIDTH}}
-    #define _IWEI_HEIGHT {{WEI_HEIGHT}}
-    #define _ISRC_WIDTH {{src}}_w
-    #define _ISRC_HEIGHT {{src}}_h
-    #define _ISRC_CHANNELS {{src}}_c
-    #define _IDST_WIDTH {{arg_dst}}_w
-    #define _IDST_HEIGHT {{arg_dst}}_h
-    #define _IDST_CHANNELS {{arg_dst}}_c
-    #define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT)
-
-        // .v    = access the whole vector (OpenCL vector)
-        // .s[x] = access the vector element at position x (scalar access)
-        TILE(int, M0, 1, xi);
-        TILE(int, M0, 1, yi);
-
-        // Convert the linear index to coordinate
-        LOOP_UNROLLING(int, i, 0, 1, M0,
-        {
-            xi[i].v = ((mout + i) % _IDST_WIDTH) * {{STRIDE_X}};
-            yi[i].v = ((mout + i) / _IDST_WIDTH) * {{STRIDE_Y}};
-            xi[i].v -= {{PAD_LEFT}};
-            yi[i].v -= {{PAD_TOP}};
-        })
-
-        LOOP_UNROLLING(int, i, 0, 1, M0,
-        {
-            {{dst}}[i].v = 0;
-        })
-
-        for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i)
-        {
-            int ck = 0;
-            int xk = i % _IWEI_WIDTH;
-            int yk = i / _IWEI_HEIGHT;
-
-            int k = 0;
-            for(; k <= (_ISRC_CHANNELS - K0); k += K0)
-            {
-                TILE({{SRC_DATA_TYPE}}, M0, K0, a);
-                TILE({{WEI_DATA_TYPE}}, N0, K0, b);
-
-                LOOP_UNROLLING(int, i, 0, 1, M0,
-                {
-                    a[i].v = {{ZERO_VALUE}};
-                })
-
-                // Load tile from the src tensor
-                T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
-
-                // Load tile from the weights tensor
-                T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
-
-                // Compute the matrix multiplication between two tiles
-                T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}});
-
-                ck += K0;
-            }
-
-            // We voluntarily use SRC_CHANNELS rather than _DSRC_CHANNELS
-            // This #if directive should be removed in case of dynamic tensor support
-    )_";
-
-    if(leftover_loop)
-    {
-        code += R"_(
-            // Left-over accumulations
-            for(; k < _ISRC_CHANNELS; ++k)
-            {
-                TILE({{SRC_DATA_TYPE}}, M0, 1, a);
-                TILE({{WEI_DATA_TYPE}}, N0, 1, b);
-
-                LOOP_UNROLLING(int, i, 0, 1, M0,
-                {
-                    a[i].v = {{ZERO_VALUE}};
-                })
-
-                // Load tile from the src tensor
-                T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
-
-                // Load tile from the weights tensor
-                // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration
-                T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
-
-                // Compute the matrix multiplication between two tiles
-                T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}});
-
-                ++ck;
-            }
-        )_";
-    }
-
-    code += R"_(
-    #undef _I_WEI_WIDTH
-    #undef _I_WEI_HEIGHT
-    #undef _ISRC_WIDTH
-    #undef _ISRC_HEIGHT
-    #undef _ISRC_CHANNELS
-    #undef _IDST_WIDTH
-    #undef _IDST_HEIGHT
-    #undef _IDST_CHANNELS
-    #undef _IY_MULTIPLIER
-
-        }
-    )_";
-
-    if(bias_info != nullptr)
-    {
-        code += R"_(
-            TILE({{BIA_DATA_TYPE}}, 1, N0, bias0);
-
-            T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, cout, 0, 1, 0, bias0);
-
-            // c = c + bias[broadcasted]
-            T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
-        )_";
-    }
-
-    code += R"_(
-    }
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-    )_";
-    return code.c_str();
-}
-
-bool export_to_cl_image_support(const ITensorInfo *tensor, GPUTarget gpu_target, DataLayout data_layout)
-{
-    if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC))
-    {
-        return false;
-    }
-
-    // If not floating point
-    if(!is_data_type_float(tensor->data_type()))
-    {
-        return false;
-    }
-
-    if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
-    {
-        return false;
-    }
-
-    // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
-    if(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
-    {
-        return false;
-    }
-
-    // Check cl image pitch alignment
-    if(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0)
-    {
-        return false;
-    }
-
-    const size_t image_w     = tensor->tensor_shape()[0] / 4;
-    const size_t image_h     = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
-    const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
-    const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
-
-    if(image_w > max_image_w || image_h > max_image_h)
-    {
-        return false;
-    }
-
-    return true;
-}
-
-CLBuildOptions ClDirectConvolutionKernelComponent::generate_build_options() const
-{
-    const auto src_info    = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
-    auto       weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
-    const auto dst_info    = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-    // const auto tile_info  = _blueprint->impl().get_tile_info();
-
-    const unsigned int channel_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::CHANNEL);
-    const DataType     data_type   = src_info->data_type();
-    const GPUTarget    gpu_target  = CLScheduler::get().target();
-
-    const unsigned int n0                 = _blueprint->impl().get_execution_window().x().step();
-    const unsigned int m0                 = _blueprint->impl().get_execution_window().y().step();
-    const unsigned int k0                 = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, src_info->dimension(channel_idx));
-    const unsigned int partial_store_n0   = dst_info->dimension(0) % n0;
-    const bool         export_to_cl_image = export_to_cl_image_support(weight_info, gpu_target, src_info->data_layout());
-
-    // Update the padding for the weights tensor if we can export to cl_image
-    if(export_to_cl_image)
-    {
-        arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(weight_info);
-    }
-
-    CLBuildOptions build_opts{};
-    build_opts.add_option("-cl-fast-relaxed-math");
-    build_opts.add_option("-DIS_TILED");
-    build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
-    build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
-    build_opts.add_option("-DK0=" + support::cpp11::to_string(k0));
-    build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
-    return build_opts;
-}
-
-void ClDirectConvolutionKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
-{
-    const auto src_info    = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
-    const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
-
-    vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src");
-
-    const GPUTarget             gpu_target         = CLScheduler::get().target();
-    const bool                  export_to_cl_image = export_to_cl_image_support(weight_info, gpu_target, src_info->data_layout());
-    const ClKernelTensorArgType weight_type        = export_to_cl_image ? ClKernelTensorArgType::Tensor_4D_t_Image : ClKernelTensorArgType::Tensor_4D_t_Buffer;
-    vtable.add(_weight, _blueprint->impl().group(_weight.arg_id), ClKernelArgDescriptor(_weight.arg_id, weight_type), "weight");
-
-    if(!_bias.is_empty()) // optional bias
-    {
-        vtable.add(_bias, _blueprint->impl().group(_bias.arg_id), ClKernelArgDescriptor(_bias.arg_id, ClKernelTensorArgType::Vector), "bias");
-    }
-    vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
-}
-
-ClDirectConvolutionKernelComponent::TagLUT ClDirectConvolutionKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
-{
-    TagLUT lut{};
-
-    const auto src_info    = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
-    const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
-    const auto bias_info   = _blueprint->impl().get_kernel_argument_info(_bias.arg_id);
-
-    // Arguments and global shared variables
-    lut["src"]    = vtable.get(_src);
-    lut["weight"] = vtable.get(_weight);
-
-    if(!_bias.is_empty()) // optional bias
-    {
-        lut["bias"]          = vtable.get(_bias);
-        lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(bias_info->data_type());
-    }
-    lut["dst"] = vtable.get(_dst);
-
-    const auto dst_argument = _blueprint->impl().get_argument_shared_vars().get_dst_var();
-    lut["arg_dst"]          = dst_argument.uniq_name;
-
-    // Local build options
-    lut["meta_kernel_id"] = id();
-    lut["ACC_DATA_TYPE"]  = src_info->data_type();
-    lut["SRC_DATA_TYPE"]  = src_info->data_type();
-    lut["WEI_DATA_TYPE"]  = weight_info->data_type();
-
-    lut["SRC_TENSOR_TYPE"] = "BUFFER";
-    switch(vtable.get(_weight).desc.tensor_arg_type)
-    {
-        case ClKernelTensorArgType::Image_Export_To_ClImage2D:
-        case ClKernelTensorArgType::Image_3D_Export_To_ClImage2D:
-        case ClKernelTensorArgType::Tensor_4D_t_Image:
-        {
-            lut["WEI_TENSOR_TYPE"] = "IMAGE";
-            break;
-        }
-        default:
-        {
-            lut["WEI_TENSOR_TYPE"] = "BUFFER";
-            break;
-        }
-    }
-    const auto width_idx  = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::WIDTH);
-    const auto height_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::HEIGHT);
-    lut["WEI_WIDTH"]      = weight_info->dimension(width_idx);
-    lut["WEI_HEIGHT"]     = weight_info->dimension(height_idx);
-
-    lut["STRIDE_X"] = _desc.conv2d.stride.x();
-    lut["STRIDE_Y"] = _desc.conv2d.stride.y();
-
-    lut["PAD_LEFT"] = _desc.conv2d.pad.left;
-    lut["PAD_TOP"]  = _desc.conv2d.pad.top;
-
-    lut["ZERO_VALUE"] = 0;
-
-    return lut;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h
deleted file mode 100644
index 5babdbab51..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h"
-
-#include "utils/TypePrinter.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClDirectConvolutionKernelComponent : public IClKernelComponent
-{
-public:
-    ClDirectConvolutionKernelComponent(ClKernelBlueprint *blueprint, const ClDirectConv2dKernelDescriptor &desc,
-                                       const Link &src, const Link &weight, const Link &dst, const Link &bias = Link{})
-        : IClKernelComponent(blueprint), _desc{ desc }, _src{ src }, _weight{ weight }, _bias{ bias }, _dst{ dst }
-    {
-    }
-
-    ComponentType         get_component_type() const override;
-    std::set<std::string> get_headers_list() const override;
-    std::string           get_additional_macros() const override;
-    std::string           get_component_code() const override;
-    Window                get_window() const override;
-    ClKernelArgList       get_args();
-    CLBuildOptions        generate_build_options() const override;
-
-    virtual std::vector<Link> get_links() const override
-    {
-        return { _src, _weight, _bias, _dst };
-    }
-
-    virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
-    virtual void allocate_shared_vars(SharedVarTable &vtable) const override;
-
-    virtual std::string name() const override
-    {
-        return "direct_convolution_" + to_string(_blueprint->impl().get_kernel_argument_info(_src.arg_id)->data_layout()) + "_" + std::to_string(id());
-    }
-
-private:
-    ClDirectConv2dKernelDescriptor _desc{};
-    Link                           _src{};
-    Link                           _weight{};
-    Link                           _bias{};
-    Link                           _dst{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp
deleted file mode 100644
index e2eba68a63..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ComponentType ClElementwiseKernelComponent::get_component_type() const
-{
-    return ComponentType::Simple;
-}
-
-std::set<std::string> ClElementwiseKernelComponent::get_headers_list() const
-{
-    return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" };
-}
-
-Window ClElementwiseKernelComponent::get_window() const
-{
-    const ITensorInfo *lhs_info = _blueprint->impl().get_kernel_argument_info(_lhs.arg_id);
-    const ITensorInfo *rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
-    ITensorInfo       *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-
-    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs_info, rhs_info, dst_info);
-
-    const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*lhs_info, *rhs_info);
-    const TensorShape &out_shape = broadcast_pair.first;
-
-    auto_init_if_empty(*dst_info, out_shape, 1, lhs_info->data_type());
-
-    TensorShape output_shape = dst_info->tensor_shape();
-    // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged
-    // This is in line with the collapsing convention used by Conv2d
-    output_shape.collapse(2U, 1U);
-    const unsigned int vector_size_byte_opencl           = 16;
-    const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0));
-    Window             win                               = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
-
-    return win;
-}
-
-std::string ClElementwiseKernelComponent::get_component_code() const
-{
-    std::string code;
-    const bool  is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
-
-    if(is_root)
-    {
-        return R"_(
-    //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
-    // IN_0(LHS)            {{lhs}}
-    // IN_1(RHS)            {{rhs}}
-    // OUT(dst, accum)      {{dst}}
-
-    // dst = lhs + rhs (mix-precision, broadcast, boundary aware)
-    TILE({{DATA_TYPE}}, M0, N0, {{dst}});
-    {
-        TILE({{DATA_TYPE}}, M0, N0, lhs_tile);
-        TILE({{DATA_TYPE}}, M0, N0, rhs_tile);
-
-        // Since mout maps to dimensions 1 (y) and dimension 2 (z) of the input tensor because of the collapsed window, bout maps to dimension 3 (w)
-        {{lhs}}_offset_first_element_in_bytes += bout * {{lhs}}_stride_w;
-        {{rhs}}_offset_first_element_in_bytes += bout * {{rhs}}_stride_w;
-
-        T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{lhs}}, cout, mout, 1, {{lhs}}_stride_y, lhs_tile);
-        T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_x}}, {{rhs_start_y}}, 1, {{rhs}}_stride_y, rhs_tile);
-
-#if defined(IS_BROADCAST)
-        T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
-#else // !defined(IS_BROADCAST)
-        T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
-#endif // defined(IS_BROADCAST)
-
-    }
-    //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
-)_";
-    }
-    else
-    {
-        return R"_(
-    //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
-    // IN_0/Out(Accumulator)   {{acc}}
-    // IN_1(Addend)        {{addend}}
-
-    // acc = addend + acc (mix-precision, broadcast, boundary aware)
-    {
-        TILE({{DATA_TYPE}}, M0, N0, addend_tile);
-
-        T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{addend}}, {{rhs_start_x}}, {{rhs_start_y}}, 1, {{addend}}_stride_y, addend_tile);
-
-#if defined(IS_BROADCAST)
-        T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
-#else // !defined(IS_BROADCAST)
-        T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
-#endif // defined(IS_BROADCAST)
-    }
-    //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
-)_";
-    }
-}
-
-CLBuildOptions ClElementwiseKernelComponent::generate_build_options() const
-{
-    const auto t_rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
-    const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-
-    CLBuildOptions     build_opts{};
-    const auto         n0               = _blueprint->impl().get_execution_window().x().step();
-    const auto         m0               = _blueprint->impl().get_execution_window().y().step();
-    const unsigned int partial_store_n0 = t_dst_info->dimension(0) % n0;
-    const bool         is_broadcast     = t_rhs_info->tensor_shape() != t_dst_info->tensor_shape();
-
-    build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
-    build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
-    build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-    build_opts.add_option_if(is_broadcast, "-DIS_BROADCAST");
-
-    return build_opts;
-}
-
-std::string ClElementwiseKernelComponent::generate_config_id() const
-{
-    auto        t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-    std::string config_id{};
-    config_id += lower_string(string_from_data_type(t_dst_info->data_type()));
-    config_id += "_";
-    config_id += support::cpp11::to_string(t_dst_info->dimension(0));
-    config_id += "_";
-    config_id += support::cpp11::to_string(t_dst_info->dimension(1));
-    config_id += "_";
-    config_id += lower_string(string_from_data_layout(t_dst_info->data_layout()));
-    return config_id;
-}
-
-void ClElementwiseKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
-{
-    const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
-    vtable.add(_lhs, _blueprint->impl().group(_lhs.arg_id), ClKernelArgDescriptor(_lhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "lhs");
-    vtable.add(_rhs, _blueprint->impl().group(_rhs.arg_id), ClKernelArgDescriptor(_rhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "rhs");
-    if(is_root)
-    {
-        vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
-    }
-}
-
-ClElementwiseKernelComponent::TagLUT ClElementwiseKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
-{
-    TagLUT       lut{};
-    const auto   t_dst_info    = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-    ITensorInfo *t_addend_info = nullptr;
-    // Arguments and global shared variables
-    const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
-    if(is_root)
-    {
-        lut["lhs"]    = vtable.get(_lhs);
-        lut["rhs"]    = vtable.get(_rhs);
-        lut["dst"]    = vtable.get(_dst);
-        t_addend_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
-    }
-    else
-    {
-        // Determine which link is the accumulator
-        Link accumulator;
-        Link addend;
-        if(_blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Automatic)
-        {
-            accumulator = _lhs;
-            addend      = _rhs;
-        }
-        else if(_blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Automatic)
-        {
-            accumulator = _rhs;
-            addend      = _lhs;
-        }
-        else
-        {
-            ARM_COMPUTE_ERROR("Invalid elementwise component linking");
-        }
-        lut["acc"]    = vtable.get(accumulator);
-        lut["addend"] = vtable.get(addend);
-        t_addend_info = _blueprint->impl().get_kernel_argument_info(addend.arg_id);
-    }
-    // Local build options
-    lut["meta_kernel_id"] = id();
-    lut["DATA_TYPE"]      = get_cl_type_from_data_type(t_dst_info->data_type());
-
-    switch(_desc.eltwise.op)
-    {
-        case ArithmeticOperation::DIV:
-            lut["ELTWISE_OP"] = "DIV";
-            break;
-        case ArithmeticOperation::ADD:
-            lut["ELTWISE_OP"] = "ADD";
-            break;
-        default:
-            ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
-    }
-
-    // Set broadcast parameters
-    // PRE: All tensors are broadcast-compatible
-    const bool is_broadcast = t_addend_info->tensor_shape() != t_dst_info->tensor_shape();
-    if(is_broadcast)
-    {
-        // Note that n0 maps to input tensor dimension 0, m0 maps to input dimensions 1 and 2 because of our collapse strategy
-        if(t_addend_info->dimension(0) == 1U && t_addend_info->dimension(1) == 1U && t_addend_info->dimension(2) == 1U) // Broadcast in X, Y, Z: collapsed rhs win [M0xN0] = [1x1]
-        {
-            lut["rhs_m0"]      = "1";
-            lut["rhs_n0"]      = "1";
-            lut["rhs_start_y"] = "0";
-            lut["rhs_start_x"] = "0";
-        }
-        else if(t_addend_info->dimension(1) == 1U && t_addend_info->dimension(2) == 1U) // Broadcast in Y and Z: collapsed rhs win [M0xN0] = [1xN]
-        {
-            lut["rhs_m0"]      = "1";
-            lut["rhs_n0"]      = "N0";
-            lut["rhs_start_y"] = "0";
-            lut["rhs_start_x"] = "cout";
-        }
-        else
-        {
-            ARM_COMPUTE_ERROR("Only support rhs broadcasting in all X, Y, Z dimensions, or just in Y and Z dimensions");
-        }
-    }
-    else
-    {
-        lut["rhs_m0"]      = "M0";
-        lut["rhs_n0"]      = "N0";
-        lut["rhs_start_y"] = "mout";
-        lut["rhs_start_x"] = "cout";
-    }
-    return lut;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h
deleted file mode 100644
index f8377457d3..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClElementwiseKernelComponent : public IClKernelComponent
-{
-public:
-    /** Construct a new Cl Elementwise Kernel Component object
-     *
-     * @param[in]  blueprint Blueprint to which this component is added
-     * @param[in]  desc      Component descriptor
-     * @param[in]  lhs       Link to LHS tensor
-     * @param[in]  rhs       Link to RHS tensor
-     * @param[out] dst       Link to DST tensor
-     *
-     * Support Level
-     * Data Type:       F16, F32
-     * Tensor Shape:    Any shape of arbitrary dimension >= 1 and <= 4
-     * Value Range:     All
-     * Broadcasting:    Only RHS tensor can be broadcasted into LHS. Only support broadcasting in dimension 1 and dimension 2 or all dimension 0, 1 and 2
-     */
-    ClElementwiseKernelComponent(ClKernelBlueprint *blueprint, const ClElementwiseKernelDescriptor &desc, const Link &lhs, const Link &rhs, const Link &dst)
-        : IClKernelComponent(blueprint), _desc{ desc }, _lhs{ lhs }, _rhs{ rhs }, _dst{ dst }
-    {
-    }
-
-    ComponentType         get_component_type() const override;
-    std::set<std::string> get_headers_list() const override;
-    std::string           get_component_code() const override;
-    Window                get_window() const override;
-    CLBuildOptions        generate_build_options() const override;
-    std::string           generate_config_id() const override;
-
-    virtual std::vector<Link> get_links() const override
-    {
-        return { _lhs, _rhs, _dst };
-    }
-
-    virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
-    virtual void allocate_shared_vars(SharedVarTable &vtable) const override;
-
-    virtual std::string name() const override
-    {
-        return "eltwise_add_" + std::to_string(id());
-    }
-
-private:
-    ClElementwiseKernelDescriptor _desc{};
-    Link                          _lhs{};
-    Link                          _rhs{};
-    Link                          _dst{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp
deleted file mode 100644
index 0a20a8f600..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ComponentType ClFloorKernelComponent::get_component_type() const
-{
-    return ComponentType::Simple;
-}
-std::set<std::string> ClFloorKernelComponent::get_headers_list() const
-{
-    return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" };
-}
-Window ClFloorKernelComponent::get_window() const
-{
-    const ITensorInfo *src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
-    ITensorInfo       *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src_info, dst_info);
-    auto_init_if_empty(*dst_info, src_info->tensor_shape(), 1, src_info->data_type());
-
-    TensorShape output_shape = dst_info->tensor_shape();
-    // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged
-    // This is in line with the collapsing convention used by Conv2d
-    output_shape.collapse(2U, 1U);
-    const unsigned int vector_size_byte_opencl           = 16;
-    const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0));
-    Window             win                               = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
-
-    return win;
-}
-std::string ClFloorKernelComponent::get_component_code() const
-{
-    bool is_root = _blueprint->impl().group(_src.arg_id) == SharedVarGroup::Argument;
-    if(is_root)
-    {
-        return R"_(
-    //------------------ START KERNEL {{meta_kernel_id}} FLOOR ---------------------
-    // IN_0(src)            {{src}}
-    // OUT(dst, accum)      {{dst}}
-    TILE({{DATA_TYPE}}, M0, N0, {{dst}});
-    {
-        TILE({{DATA_TYPE}}, M0, N0, src_tile);
-
-        // Since mout maps to dimensions 1 (y) and dimension 2 (z) of the input tensor because of the collapsed window, bout maps to dimension 3 (w)
-        {{src}}_offset_first_element_in_bytes += bout * {{src}}_stride_w;
-        T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{src}}, cout, mout, 1, {{src}}_stride_y, src_tile);
-
-        T_FLOOR({{DATA_TYPE}}, M0, N0, src_tile, {{dst}});
-    }
-    //------------------ END KERNEL {{meta_kernel_id}} FLOOR ---------------------
-)_";
-    }
-    else
-    {
-        return R"_(
-    //------------------ START KERNEL {{meta_kernel_id}} FLOOR ---------------------
-    // IN_0/Out(Accumulator)        {{acc}}
-    // output = floor(input)
-    {
-        T_FLOOR({{DATA_TYPE}}, M0, N0, {{acc}}, {{acc}});
-    }
-    //------------------ END KERNEL {{meta_kernel_id}} FLOOR ---------------------
-)_";
-    }
-}
-CLBuildOptions ClFloorKernelComponent::generate_build_options() const
-{
-    CLBuildOptions     build_opts{};
-    const auto         n0               = _blueprint->impl().get_execution_window().x().step();
-    const auto         m0               = _blueprint->impl().get_execution_window().y().step();
-    const auto         dst_info         = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-    const unsigned int partial_store_n0 = dst_info->dimension(0) % n0;
-    build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
-    build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
-    build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-    return build_opts;
-}
-std::string ClFloorKernelComponent::generate_config_id() const
-{
-    auto        t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-    std::string config_id{};
-    config_id += lower_string(string_from_data_type(t_dst_info->data_type()));
-    config_id += "_";
-    config_id += support::cpp11::to_string(t_dst_info->dimension(0));
-    config_id += "_";
-    config_id += support::cpp11::to_string(t_dst_info->dimension(1));
-    config_id += "_";
-    config_id += lower_string(string_from_data_layout(t_dst_info->data_layout()));
-    return config_id;
-}
-void ClFloorKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
-{
-    vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src");
-    vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
-}
-ClFloorKernelComponent::TagLUT ClFloorKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
-{
-    TagLUT     lut{};
-    const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-    // Arguments and global shared variables
-    const bool is_root = _blueprint->impl().group(_src.arg_id) == SharedVarGroup::Argument;
-
-    if(is_root)
-    {
-        lut["src"] = vtable.get(_src);
-        lut["dst"] = vtable.get(_dst);
-    }
-    else
-    {
-        lut["acc"] = vtable.get(_src);
-    }
-
-    lut["meta_kernel_id"] = id();
-    lut["DATA_TYPE"]      = get_cl_type_from_data_type(t_dst_info->data_type());
-    return lut;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h
deleted file mode 100644
index e791b36382..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClFloorKernelComponent : public IClKernelComponent
-{
-public:
-    /** Construct a new Cl Floor Kernel Component object
-     *
-     * @param blueprint  Blueprint to which this component is added
-     * @param src        Link to SRC tensor
-     * @param dst        Link to DST tensor
-     *
-     * Support Level
-     * Data Type:       F16, F32
-     * Tensor Shape:    Any shape of arbitrary dimension >= 1 and <= 4
-     * Value Range:     All
-     */
-    ClFloorKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst)
-        : IClKernelComponent(blueprint), _src{ src }, _dst{ dst }
-    {
-    }
-
-    ComponentType         get_component_type() const override;
-    std::set<std::string> get_headers_list() const override;
-    std::string           get_component_code() const override;
-    Window                get_window() const override;
-    CLBuildOptions        generate_build_options() const override;
-    std::string           generate_config_id() const override;
-
-    virtual std::vector<Link> get_links() const override
-    {
-        return { _src, _dst };
-    }
-
-    virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
-    virtual void allocate_shared_vars(SharedVarTable &vtable) const override;
-
-    virtual std::string name() const override
-    {
-        return "floor_" + std::to_string(id());
-    }
-
-private:
-    Link _src{};
-    Link _dst{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h
deleted file mode 100644
index 3f99dd5553..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h"
-
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp
deleted file mode 100644
index 7c805d5368..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ComponentType ClStoreBlockBoundaryAwareKernelComponent::get_component_type() const
-{
-    return ComponentType::Store;
-}
-
-std::string ClStoreBlockBoundaryAwareKernelComponent::get_component_code() const
-{
-    return R"_(
-    //------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
-
-    __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + (g_x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(g_y, M0, PARTIAL_STORE_M0) * {{dst}}_stride_y);
-
-#if defined(REINTERPRET_OUTPUT_AS_3D)
-    // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
-    // multiply dst_stride_z by DEPTH_GEMM3D
-    dst_addr += g_z * {{dst}}_stride_z * DEPTH_GEMM3D;
-
-#else // defined(REINTERPRET_OUTPUT_AS_3D)
-
-    // Add offset for batched GEMM
-    dst_addr += g_z * {{dst}}_stride_z;
-
-#endif // defined(REINTERPRET_OUTPUT_AS_3D)
-
-    STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, {{src}}, dst_addr, {{dst}}_stride_y, g_zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, g_cond_y, g_cond_x);
-
-    //------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
-
-)_";
-}
-
-CLBuildOptions ClStoreBlockBoundaryAwareKernelComponent::generate_build_options() const
-{
-    auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-    // auto tile_info  = _blueprint->impl().get_tile_info();
-
-    CLBuildOptions build_opts{};
-
-    const auto n0         = _blueprint->impl().get_execution_window().x().step();
-    const auto m0         = _blueprint->impl().get_execution_window().y().step();
-    const auto partial_m0 = t_dst_info->dimension(0) % m0;
-    const auto partial_n0 = t_dst_info->dimension(1) % n0;
-
-    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(t_dst_info->data_type()));
-    build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
-    build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
-    build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_m0));
-    build_opts.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_n0));
-
-    return build_opts;
-}
-
-void ClStoreBlockBoundaryAwareKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
-{
-    vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Image_3D), "src");
-    vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Image_3D), "dst");
-}
-
-ClStoreBlockBoundaryAwareKernelComponent::TagLUT ClStoreBlockBoundaryAwareKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
-{
-    return {
-        { "meta_kernel_id", id() },
-        { "src", vtable.get(_src) },
-        { "dst", vtable.get(_dst) },
-    };
-}
-
-ComponentType ClStoreIndirectWidthSelectKernelComponent::get_component_type() const
-{
-    return ComponentType::Store;
-}
-
-std::string ClStoreIndirectWidthSelectKernelComponent::get_component_code() const
-{
-    return R"_(
-    //------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
-    {
-    // This also follows NHWC layout
-    // cout maps to global_id(0) maps to Channel
-    // mout maps to global_id(1) maps to Height and Weight (Collapsed Window)
-    // bout maps to global_id(3) maps to N / Batch
-    #define _IDST_WIDTH {{dst}}_w
-    #define _IDST_HEIGHT {{dst}}_h
-        TILE(uint, M0, 1, dst_indirect_y);
-
-        // Calculate the destination indirect Y
-        LOOP_UNROLLING(int, i, 0, 1, M0,
-        {
-            dst_indirect_y[i].v = (uint)min(mout + i, (int)(_IDST_WIDTH * _IDST_HEIGHT) - 1);
-            dst_indirect_y[i].v += bout * (int)(_IDST_WIDTH * _IDST_HEIGHT);
-        })
-
-        bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
-
-        T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, cout, {{dst}}_stride_y, x_cond, {{src}}, dst_indirect_y);
-
-    #undef _IDST_WIDTH
-    #undef _IDST_HEIGHT
-        //------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
-    }
-
-)_";
-}
-
-CLBuildOptions ClStoreIndirectWidthSelectKernelComponent::generate_build_options() const
-{
-    CLBuildOptions build_opts{};
-
-    return build_opts;
-}
-
-void ClStoreIndirectWidthSelectKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
-{
-    vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src");
-    vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
-}
-
-ClStoreIndirectWidthSelectKernelComponent::TagLUT ClStoreIndirectWidthSelectKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
-{
-    TagLUT lut{};
-
-    // Arguments and global shared variables
-    lut["src"] = vtable.get(_src);
-    lut["dst"] = vtable.get(_dst);
-
-    // Local build options
-    lut["meta_kernel_id"]  = id();
-    lut["DST_TENSOR_TYPE"] = "BUFFER";
-    const auto dst_info    = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-    lut["DST_DATA_TYPE"]   = dst_info->data_type();
-
-    return lut;
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h
deleted file mode 100644
index e0b188dc8d..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-class ClStoreBlockBoundaryAwareKernelComponent : public IClKernelComponent
-{
-public:
-    ClStoreBlockBoundaryAwareKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst)
-        : IClKernelComponent(blueprint), _src{ src }, _dst{ dst }
-    {
-    }
-    ComponentType  get_component_type() const override;
-    std::string    get_component_code() const override;
-    CLBuildOptions generate_build_options() const override;
-    TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
-    void allocate_shared_vars(SharedVarTable &vtable) const override;
-
-    virtual std::vector<Link> get_links() const override
-    {
-        return { _src, _dst };
-    }
-
-    virtual std::string name() const override
-    {
-        return "";
-    }
-
-private:
-    Link _src{};
-    Link _dst{};
-};
-
-class ClStoreIndirectWidthSelectKernelComponent : public IClKernelComponent
-{
-public:
-    ClStoreIndirectWidthSelectKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst)
-        : IClKernelComponent(blueprint), _src{ src }, _dst{ dst }
-    {
-    }
-    ComponentType  get_component_type() const override;
-    std::string    get_component_code() const override;
-    CLBuildOptions generate_build_options() const override;
-    virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override;
-    void allocate_shared_vars(SharedVarTable &vtable) const override;
-
-    virtual std::vector<Link> get_links() const override
-    {
-        return { _src, _dst };
-    }
-
-    virtual std::string name() const override
-    {
-        return "";
-    }
-
-private:
-    Link _src{};
-    Link _dst{};
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/OperatorGraph.cpp b/src/core/experimental/dynamic_fusion/OperatorGraph.cpp
deleted file mode 100644
index bd88afdb47..0000000000
--- a/src/core/experimental/dynamic_fusion/OperatorGraph.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "arm_compute/core/experimental/OperatorGraph.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h"
-#include "src/core/helpers/AutoConfiguration.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-namespace
-{
-void check_dependency_graph_op_success(OperatorGraph &graph, const Status &status)
-{
-    if(!bool(status))
-    {
-        graph.impl()->status = Status{ status.error_code(), "Cycles or loops are not allowed" };
-    }
-}
-
-// Check if there are more than one roots in the graph
-void check_multiple_roots(OperatorGraph &graph)
-{
-    if(graph.impl()->graph.get_root_ops().size() > 1)
-    {
-        graph.impl()->status = Status{ ErrorCode::RUNTIME_ERROR, "Multiple roots are not allowed" };
-    }
-}
-
-void check_execution_shape(OperatorGraph &graph, const ITensorInfo &dst_info)
-{
-    const auto roots = graph.impl()->graph.get_root_ops();
-    for(auto root : roots)
-    {
-        // We assume exactly 1 dst tensor for all operators
-        const auto root_info = graph.impl()->tensors[graph.impl()->graph.dst_tensors(root)[0]]->get_tensor_info();
-        for(unsigned int dim = 0; dim < root_info->num_dimensions(); ++dim)
-        {
-            if(root_info->dimension(dim) != dst_info.dimension(dim))
-            {
-                graph.impl()->status = Status{ ErrorCode::RUNTIME_ERROR, "Cannot change execution space" };
-                return;
-            }
-        }
-    }
-}
-} // namespace
-
-OpTensor::OpTensor(Id id)
-    : _id{ id }
-{
-}
-
-OpTensor::Id OpTensor::id() const
-{
-    return _id;
-}
-
-bool operator<(const OpTensor &t0, const OpTensor &t1)
-{
-    return t0.id() < t1.id();
-}
-
-Operator::Operator(Id id)
-    : _id{ id }
-{
-}
-
-Operator::Id Operator::id() const
-{
-    return _id;
-}
-
-bool operator<(const Operator &op0, const Operator &op1)
-{
-    return op0.id() < op1.id();
-}
-
-OperatorGraph::OperatorGraph()
-    : _impl{ std::make_unique<Implementation>() }
-{
-}
-
-OperatorGraph::~OperatorGraph() = default;
-
-OperatorGraph::Implementation *OperatorGraph::impl()
-{
-    return _impl.get();
-}
-
-const OperatorGraph::Implementation *OperatorGraph::impl() const
-{
-    return _impl.get();
-}
-
-Status validate(const OperatorGraph &graph)
-{
-    return graph.impl()->status;
-}
-
-OpTensor add_tensor(OperatorGraph &graph, ITensorInfo &info)
-{
-    auto     id = graph.impl()->graph.add_tensor();
-    OpTensor op_tensor(id);
-    graph.impl()->add_tensor(id, &info);
-    return op_tensor;
-}
-
-Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor bias, OpTensor dst)
-{
-    // Check if map is empty as a complex operator can only be root
-    if(!graph.impl()->graph.get_root_ops().empty())
-    {
-        graph.impl()->status = Status{ ErrorCode::RUNTIME_ERROR, "Cannot add multiple complex operators" };
-        return Operator{};
-    }
-
-    std::pair<Status, DependencyGraph::Id> status_id;
-
-    if(bias.id() == -1)
-    {
-        status_id = graph.impl()->graph.add_operator({ input.id(), weights.id() }, { dst.id() });
-    }
-    else
-    {
-        status_id = graph.impl()->graph.add_operator({ input.id(), weights.id(), bias.id() }, { dst.id() });
-    }
-
-    check_dependency_graph_op_success(graph, status_id.first);
-
-    Operator op_node(status_id.second);
-
-    // Infer TensorInfo
-    OpTensorContent *dst_tensor = graph.impl()->tensors[dst.id()].get();
-    if(dst_tensor->get_tensor_info()->total_size() == 0)
-    {
-        auto src   = graph.impl()->tensors[input.id()]->get_tensor_info();
-        auto wts   = graph.impl()->tensors[weights.id()]->get_tensor_info();
-        auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), src->data_layout(), wts->tensor_shape(), PadStrideInfo(desc.stride.x(), desc.stride.y(), desc.pad.left,
-                                                                            desc.pad.right,
-                                                                            desc.pad.top, desc.pad.bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
-
-        auto_init_if_empty(*(dst_tensor->get_tensor_info()), src->clone()->set_tensor_shape(shape));
-    }
-
-    // Check execution space
-    auto dst_info = dst_tensor->get_tensor_info();
-    check_execution_shape(graph, *dst_info);
-
-    ITensorDescPack<OpTensorContent> tensors;
-    tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[input.id()].get());
-    tensors.add_const_tensor(ACL_SRC_1, graph.impl()->tensors[weights.id()].get());
-    if(bias.id() != -1)
-    {
-        tensors.add_const_tensor(ACL_SRC_2, graph.impl()->tensors[bias.id()].get());
-    }
-    tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get());
-
-    graph.impl()->add_node<Conv2dContent>(status_id.second, desc, tensors);
-    check_multiple_roots(graph);
-
-    return op_node;
-}
-
-Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor dst)
-{
-    return add_op_conv2d(graph, desc, input, weights, OpTensor(-1), dst);
-}
-
-void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMethod method)
-{
-    auto node = utils::cast::polymorphic_downcast<Conv2dContent *>(graph.impl()->operators[conv2d.id()].get());
-    node->set_method(method);
-}
-
-Operator add_op_elementwise_op(OperatorGraph &graph, const ElementwiseDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst)
-{
-    auto id = graph.impl()->graph.add_operator({ rhs.id(), lhs.id() }, { dst.id() });
-    check_dependency_graph_op_success(graph, id.first);
-
-    Operator op_node(id.second);
-
-    // Infer TensorInfo
-    auto             node_lhs = graph.impl()->tensors[lhs.id()]->get_tensor_info();
-    auto             node_rhs = graph.impl()->tensors[rhs.id()]->get_tensor_info();
-    OpTensorContent *node_dst = graph.impl()->tensors[dst.id()].get();
-
-    if(node_dst->get_tensor_info()->total_size() == 0)
-    {
-        const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*node_rhs, *node_lhs);
-        auto_init_if_empty(*(node_dst->get_tensor_info()), node_lhs->clone()->set_tensor_shape(broadcast_pair.first));
-    }
-
-    // Check execution space
-    auto dst_info = node_dst->get_tensor_info();
-    check_execution_shape(graph, *dst_info);
-
-    ITensorDescPack<OpTensorContent> tensors;
-    tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[lhs.id()].get());
-    tensors.add_const_tensor(ACL_SRC_1, graph.impl()->tensors[rhs.id()].get());
-    tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get());
-    graph.impl()->add_node<ElementwiseContent>(id.second, desc, tensors);
-    check_multiple_roots(graph);
-
-    return op_node;
-}
-
-Operator add_op_floor(OperatorGraph &graph, const FloorDescriptor &desc, OpTensor src, OpTensor dst)
-{
-    auto id = graph.impl()->graph.add_operator({ src.id() }, { dst.id() });
-    check_dependency_graph_op_success(graph, id.first);
-
-    Operator op_node(id.second);
-
-    // Infer TensorInfo
-    auto             node_src = graph.impl()->tensors[src.id()]->get_tensor_info();
-    OpTensorContent *node_dst = graph.impl()->tensors[dst.id()].get();
-
-    if(node_dst->get_tensor_info()->total_size() == 0)
-    {
-        auto_init_if_empty(*(node_dst->get_tensor_info()), *node_src);
-    }
-
-    // Check execution space
-    auto dst_info = node_dst->get_tensor_info();
-    check_execution_shape(graph, *dst_info);
-
-    ITensorDescPack<OpTensorContent> tensors;
-    tensors.add_const_tensor(ACL_SRC_0, graph.impl()->tensors[src.id()].get());
-    tensors.add_const_tensor(ACL_DST_0, graph.impl()->tensors[dst.id()].get());
-    graph.impl()->add_node<FloorContent>(id.second, desc, tensors);
-    check_multiple_roots(graph);
-
-    return op_node;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.cpp
deleted file mode 100644
index 4e57d66a1c..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-namespace
-{
-std::vector<std::pair<ClKernelFusionGroup *, ClKernelFusionGroup *>> get_combinations(const std::vector<ClKernelFusionGroup *> &sorted_fgs)
-{
-    ARM_COMPUTE_ERROR_ON(sorted_fgs.size() <= 1);
-    std::vector<std::pair<ClKernelFusionGroup *, ClKernelFusionGroup *>> combo;
-    for(size_t i = 0; i < sorted_fgs.size() - 1; ++i)
-    {
-        for(size_t j = i + 1; j < sorted_fgs.size(); ++j)
-        {
-            combo.push_back(std::make_pair(sorted_fgs.at(i), sorted_fgs.at(j)));
-        }
-    }
-    return combo;
-}
-} // namespace
-std::vector<const ClKernel *> traverse(const ClKernelFusionGroup &group)
-{
-    std::vector<const ClKernel *> kernels;
-    const auto                    sorted = group.graph.topological_sort();
-    for(const auto &pack : sorted.second)
-    {
-        kernels.push_back(group.fused_kernels.at(pack.op));
-    }
-    return kernels;
-}
-
-std::vector<const ClKernelFusionGroup *> traverse(const ClFusedKernelGraph &graph)
-{
-    std::vector<const ClKernelFusionGroup *> kernels;
-    const auto                               sorted = graph.fg_dependency.topological_sort();
-    for(const auto &pack : sorted.second)
-    {
-        kernels.push_back(graph.fusion_groups.at(pack.op).get());
-    }
-    return kernels;
-}
-
-std::vector<ClKernelFusionGroup *> traverse(ClFusedKernelGraph &graph)
-{
-    std::vector<ClKernelFusionGroup *> kernels;
-    const auto                         sorted = graph.fg_dependency.topological_sort();
-    for(const auto &pack : sorted.second)
-    {
-        kernels.push_back(graph.fusion_groups.at(pack.op).get());
-    }
-    return kernels;
-}
-
-std::pair<Status, ClFusedKernelGraph> init_fusion_graph(const ClKernelGraph &kernel_graph)
-{
-    ClFusedKernelGraph fused_kernel_graph{};
-    fused_kernel_graph.original_graph = &kernel_graph; // Create a copy of the original kernel graph
-    fused_kernel_graph.fg_dependency  = DependencyGraph();
-    // Initialize all fusion groups
-    for(const auto &kernel : traverse(kernel_graph))
-    {
-        fused_kernel_graph.add_fusion_group({ kernel });
-    }
-    return { Status{}, fused_kernel_graph };
-}
-
-Status fuse(ClFusedKernelGraph &fused_kernel_graph)
-{
-    // A naive fusion algorithm that's guaranteed to find optimal pattern if there are no branches
-    // If there are branches, the algorithm cannot guanrantee optimality as it doesn't perform any searches
-
-    bool fusion_found = false;
-    do
-    {
-        fusion_found          = false;
-        const auto sorted_fgs = traverse(fused_kernel_graph);
-        if(sorted_fgs.size() <= 1)
-        {
-            // Only one or zero fusion group, thus no need to perform fusion
-            return Status{};
-        }
-        auto fgs_combo = get_combinations(sorted_fgs);
-        for(auto fgs : fgs_combo)
-        {
-            auto       fg0 = fgs.first;
-            auto       fg1 = fgs.second;
-            const auto st  = fused_kernel_graph.can_fuse(*fg0, *fg1);
-            if(bool(st))
-            {
-                const auto st = fused_kernel_graph.fuse(*fg0, *fg1);
-                if(!bool(st))
-                {
-                    return st;
-                }
-                fusion_found = true;
-                break;
-            }
-        }
-    }
-    while(fusion_found);
-    return Status{};
-}
-Status generate_store(ClKernelBlueprint &bp, const ClFusedKernelGraph &fused_kernel_graph, const ClKernelFusionGroup &fg)
-{
-    Status st{};
-    for(const auto &dst_t_id : fused_kernel_graph.fg_dependency.dst_tensors(fg.id))
-    {
-        const auto dst_t = fused_kernel_graph.original_graph->get_tensor(dst_t_id);
-
-        /// NOTE: dst tensor must have already been added to the blueprint at this point
-        ArgumentID dst_id;
-        st = add_tensor(bp, dst_t->desc, dst_id, dst_t->id);
-        if(!bool(st))
-        {
-            return st;
-        }
-        /// NOTE: the extra dst tensor is needed as the store kcomp requires 2 tensors. But this is irrelevant to the fused kernel graph
-        /// since both tensors share the exact same info and kernel arg descriptor
-        ArgumentID dst_dst_id;
-        st = add_tensor(bp, dst_t->desc, dst_dst_id);
-        if(!bool(st))
-        {
-            return st;
-        }
-        /// NOTE: Update the merge point map to link dst_dst_id with dst_t->id instead.
-        /// This is required because the get_arguments() returned by the blueprint returns the dst tensor added by the store component
-        st = update_merge_point(bp, dst_dst_id, dst_t->id);
-        if(!bool(st))
-        {
-            return st;
-        }
-        st = add_kcomp_store(bp, fg.get_root_kernel()->config().store_type, dst_id, dst_dst_id);
-        if(!bool(st))
-        {
-            return st;
-        }
-    }
-    return st;
-}
-
-Status generate(ClWorkload &workload, const ClWorkloadContext &ctx, const ClFusedKernelGraph &fused_kernel_graph)
-{
-    workload.context = ctx;
-    for(const auto &fg : traverse(fused_kernel_graph))
-    {
-        ClKernelBlueprint bp{};
-        for(const auto &kernel : traverse(*fg))
-        {
-            const auto st = kernel->generate(bp);
-            if(!bool(st))
-            {
-                return st;
-            }
-        }
-        auto st = set_tile_info(bp, fg->get_root_kernel()->config().tile_desc);
-        if(!bool(st))
-        {
-            return st;
-        }
-        st = generate_store(bp, fused_kernel_graph, *fg);
-        if(!bool(st))
-        {
-            return st;
-        }
-
-        ClKernelCode code{};
-        st = build(code, ClCodeBuilderContext{ ctx.gpu_info }, bp);
-        if(!bool(st))
-        {
-            return st;
-        }
-        const auto bp_graph = get_dependency_graph(bp);
-
-        // Get tensor info
-        std::vector<Id> workload_src_tensors{};
-        for(const auto &src_t_id : fused_kernel_graph.fg_dependency.src_tensors(fg->id))
-        {
-            const auto src_t = fused_kernel_graph.original_graph->get_tensor(src_t_id);
-            // Get corresponding kernel arg descriptor
-            const auto arg_desc    = code.arguments.at(bp_graph.get_merge_points().at(src_t->id));
-            const auto kernel_t_id = workload.add_workload_tensor(src_t->desc, src_t->memory_type, src_t->memory_info, arg_desc, src_t->id);
-            workload_src_tensors.push_back(kernel_t_id);
-        }
-        std::vector<Id> workload_dst_tensors{};
-        for(const auto &dst_t_id : fused_kernel_graph.fg_dependency.dst_tensors(fg->id))
-        {
-            const auto dst_t = fused_kernel_graph.original_graph->get_tensor(dst_t_id);
-            // Get corresponding kernel arg descriptor
-            const auto arg_desc    = code.arguments.at(bp_graph.get_merge_points().at(dst_t->id));
-            const auto kernel_t_id = workload.add_workload_tensor(dst_t->desc, dst_t->memory_type, dst_t->memory_info, arg_desc, dst_t->id);
-            workload_dst_tensors.push_back(kernel_t_id);
-        }
-
-        workload.add_unit_workload(fg->get_root_kernel()->config().stage, code, workload_src_tensors, workload_dst_tensors);
-    }
-
-    return Status{};
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h
deleted file mode 100644
index 2051f1b62f..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/experimental/DependencyGraph.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h"
-#include "support/DeepCopy.h"
-
-#include <vector>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-struct ClKernelFusionGroup;
-
-/** A const view of a subgraph of the @ref ClKernelGraph to be fused together
- *
- */
-struct ClKernelFusionGroup
-{
-public:
-    using Id = DependencyGraph::Id;
-
-    ClKernelFusionGroup() = default;
-    ClKernelFusionGroup(Id id)
-        : id{ id }, graph{}, fused_kernels{}, tensors{}
-    {
-    }
-    ~ClKernelFusionGroup() = default;
-
-    void set_id(Id i)
-    {
-        id = i;
-    }
-
-    Id add_fused_kernel(const ClKernel *kernel)
-    {
-        /// PRE: Acyclicity ensured by DependencyGraph
-        /// PRE: Connectedness ensured by DependencyGraph
-        /// PRE: Single-rootedness ensured by User
-        std::vector<Id> src_tensors;
-        for(const auto t : kernel->tensors().get_const_src_tensors())
-        {
-            auto id = graph.add_tensor(t->id);
-            if(tensors.find(id) == tensors.end())
-            {
-                tensors[id] = t;
-            }
-            src_tensors.push_back(id);
-        }
-        std::vector<Id> dst_tensors;
-        for(const auto t : kernel->tensors().get_const_dst_tensors())
-        {
-            auto id = graph.add_tensor(t->id);
-            if(tensors.find(id) == tensors.end())
-            {
-                tensors[id] = t;
-            }
-            dst_tensors.push_back(id);
-        }
-        auto id                  = graph.add_operator(src_tensors, dst_tensors);
-        fused_kernels[id.second] = kernel;
-        return id.second;
-    }
-
-    const ClKernel *get_root_kernel() const
-    {
-        auto root_kernels = graph.get_root_ops();
-        ARM_COMPUTE_ERROR_ON(root_kernels.size() != 1);
-        return fused_kernels.at(root_kernels.at(0));
-    }
-
-    std::vector<const ClKernelTensor *> get_src_tensors() const
-    {
-        std::vector<const ClKernelTensor *> src_tensors;
-        for(auto tensor_id : graph.src_tensors())
-        {
-            src_tensors.push_back(tensors.at(tensor_id));
-        }
-        return src_tensors;
-    }
-
-    std::vector<const ClKernelTensor *> get_dst_tensors() const
-    {
-        std::vector<const ClKernelTensor *> dst_tensors;
-        for(auto tensor_id : graph.dst_tensors())
-        {
-            dst_tensors.push_back(tensors.at(tensor_id));
-        }
-        return dst_tensors;
-    }
-
-    friend bool operator==(const ClKernelFusionGroup &fg0, const ClKernelFusionGroup &fg1)
-    {
-        return fg0.id == fg1.id && fg0.graph == fg1.graph && fg0.fused_kernels == fg1.fused_kernels && fg0.tensors == fg1.tensors;
-    }
-
-    Id              id{};
-    DependencyGraph graph{}; // A subgraph of the original ClKernelGraph
-    std::map<Id, const ClKernel *>       fused_kernels{};
-    std::map<Id, const ClKernelTensor *> tensors{};
-};
-
-std::vector<const ClKernel *> traverse(const ClKernelFusionGroup &group);
-
-struct ClFusedKernelGraph
-{
-public:
-    using Id = DependencyGraph::Id;
-
-    using KernelFusionGroupMap = std::map<Id, utils::memory::deep_unique_ptr<ClKernelFusionGroup>>;
-
-    ClFusedKernelGraph()                                = default;
-    ~ClFusedKernelGraph()                               = default;
-    ClFusedKernelGraph(const ClFusedKernelGraph &graph) = default;
-    ClFusedKernelGraph &operator=(const ClFusedKernelGraph &graph) = default;
-    ClFusedKernelGraph(ClFusedKernelGraph &&graph)                 = default;
-    ClFusedKernelGraph &operator=(ClFusedKernelGraph &&graph) = default;
-
-    friend bool operator==(const ClFusedKernelGraph &graph0, const ClFusedKernelGraph &graph1)
-    {
-        /// NOTE: fg_dependency may change based on the order of fusion, and thus is omitted in the comparison.
-        ///       The fusion groups can already guarantee the equivalence of fusion
-        ///       In the future we may want to enforce a stronger equivalence by implementing topological comparison between @ref DependencyGraph s
-        return graph0.original_graph == graph1.original_graph && graph0.fusion_groups == graph1.fusion_groups;
-    }
-
-    Id add_fusion_group(const std::vector<const ClKernel *> &fused_kernels)
-    {
-        auto fg = utils::memory::make_deep_unique<ClKernelFusionGroup, ClKernelFusionGroup>();
-        for(const auto k : fused_kernels)
-        {
-            fg->add_fused_kernel(k);
-        }
-        const auto      src_tensors = fg->get_src_tensors();
-        const auto      dst_tensors = fg->get_dst_tensors();
-        std::vector<Id> inputs{};
-        std::transform(std::begin(src_tensors), std::end(src_tensors), std::back_inserter(inputs), [this](auto kernel)
-        {
-            return fg_dependency.add_tensor(kernel->id);
-        });
-        std::vector<Id> outputs{};
-        std::transform(std::begin(dst_tensors), std::end(dst_tensors), std::back_inserter(outputs), [this](auto kernel)
-        {
-            return fg_dependency.add_tensor(kernel->id);
-        });
-        const auto id = fg_dependency.add_operator(inputs, outputs);
-        fg->set_id(id.second);
-        fusion_groups[id.second] = std::move(fg);
-        return id.second;
-    }
-
-    Status fuse(ClKernelFusionGroup &fg0, ClKernelFusionGroup &fg1)
-    {
-        /// PRE: Already checked by can_fuse, and thus all the INVs and ASSUMPTIONS still hold
-        ClKernelFusionGroup *fg_src{};
-        ClKernelFusionGroup *fg_dst{};
-        // Find fg_src (parent / root) and fg_dst (child / non-root)
-        if(is_in(fg1.id, fg_dependency.dst_ops(fg0.id)))
-        {
-            fg_src = &fg0;
-            fg_dst = &fg1;
-        }
-        else if(is_in(fg0.id, fg_dependency.dst_ops(fg1.id)))
-        {
-            fg_src = &fg1;
-            fg_dst = &fg0;
-        }
-        else
-        {
-            return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: Not directly connected fusion groups cannot be fused together" };
-        }
-
-        for(const auto &t : fg_dependency.src_tensors(fg_dst->id))
-        {
-            if(!is_in(t, fg_dependency.dst_tensors(fg_src->id)))
-            {
-                // Link any incoming tensors of fg_dst, that ARE NOT in between fg_src and fg_dst, to fg_src
-
-                // Before:
-                // fg_src
-                // |
-                // ..          t1
-                // |           |
-                // -> fg_dst <-
-                //
-                // After:
-                // fg_src <---t1
-                //
-                const auto st = link_src_tensors(fg_src->id, { t });
-                if(!bool(st))
-                {
-                    return st;
-                }
-            }
-            else
-            {
-                const auto dst_fgs = fg_dependency.dst_ops_from_tensor(t);
-                if(dst_fgs.size() == 1U && dst_fgs.at(0) == fg_dst->id)
-                {
-                    // Remove any incoming tensors of fg_dst, that ARE in between fg_src and fg_dst
-                    // AND that are not connected to any other outgoing fgs (Note that they cannot connect to any other incoming fgs as all tensors can have at most 1 incoming fg (ASSUMPTION 3))
-
-                    // Before:
-                    // fg_src
-                    // |
-                    // t0
-                    // |
-                    // -> fg_dst
-                    //
-                    // After:
-                    // fg_src
-                    //
-                    const auto st = remove_fg_tensor(t);
-                    if(!bool(st))
-                    {
-                        return st;
-                    }
-                }
-                else
-                {
-                    // If the tensors ARE in between fg_src and fg_dst
-                    // BUT have any other outgoing fgs than fg_dst, then we leave it as a dst tensor to the fused fg_src
-
-                    // Before:
-                    // fg_src
-                    // |
-                    // t0
-                    // |
-                    // |-----------
-                    // |          |
-                    // -> fg_dst  -> fg_other
-                    //
-                    // After:
-                    // fg_src
-                    // |
-                    // t0
-                    // |
-                    // -> fg_other
-                    //
-
-                    // Note that this may seem like a case we shouldn't fuse. But actually all it means is that t0 is an
-                    // intermediate tensor between the fused fg_src and fg_dst, but only that we also STORE it to memory
-                    // so that any unfused fg's (fg_other in this case) can read it.
-                    // So all this means that we not only can STORE the tensors at the "end" of a fusion group,
-                    // but also any other tensors that are not source tensors. And all tensors that are STORED (exported),
-                    // can be termed "dst tensors" to a fusion group
-                    void();
-                }
-            }
-        }
-
-        for(const auto &t : fg_dependency.dst_tensors(fg_dst->id))
-        {
-            // Link any outgoing tensors of fg_dst to fg_src
-
-            // Before:
-            // fg_src
-            // |
-            // ..
-            // |
-            // -> fg_dst
-            //    |
-            //    |--------
-            //    |       |
-            //    |-> t0  |-> t1
-            //
-            // After:
-            // fg_src
-            // |
-            // |--------
-            // |       |
-            // |-> t0  |-> t1
-            //
-            const auto st = link_dst_tensors(fg_src->id, { t });
-            if(!bool(st))
-            {
-                return st;
-            }
-        }
-
-        // Merge fg_dst's graph into fg_src's graph
-        for(const auto kernel : traverse(*fg_dst))
-        {
-            fg_src->add_fused_kernel(kernel);
-        }
-
-        const auto st = remove_fg(fg_dst->id);
-        return st;
-    }
-    Status can_fuse(const ClKernelFusionGroup &fg0, const ClKernelFusionGroup &fg1) const
-    {
-        /// ASSUMPTION0: All tensors have 0 or 1 incoming kernel
-        /// ASSUMPTION1: All kernels have exactly 1 dst tensor (Temporary, can be lifted once we start supporting multi-dst kernels)
-        ///              Note that this does not apply to fusion groups
-        /// ASSUMPTION2: Simple kernels' tile infos can be overriden (share with) that of the root kernel's
-        /// ASSUMPTION3: Extension of ASSUMPTION0: All tensors have 0 or 1 incoming fusion group
-        /// INV0: All Fusion groups have a single root
-        /// INV1: All Fusion groups have no cycles or loops within themselves <- guaranteed by the underlying ClKernelGraph having no cycles or loops; enforced by DependencyGraph
-        /// INV2: The ClKernelFusionGroup itself has no cycles or loops <- enforced by DependencyGraph
-        /// INV3: All non-roots are Simple kernels
-        /// INV4: All non roots' dst tensors have the same shape as that of the root kernel
-        /// INV5: All kernels within a fusion group have the same UnitWorkloadStage
-        const ClKernelFusionGroup *fg_src {};
-        const ClKernelFusionGroup *fg_dst{};
-
-        // Check 0: Ensure fg0 and fg1 are "directly connected": one of them is a direct parent of the other
-        // This guarantess INV0
-        // This also finds fg_src (parent / root) and fg_dst (child / non-root)
-        if(is_in(fg1.id, fg_dependency.dst_ops(fg0.id)))
-        {
-            fg_src = &fg0;
-            fg_dst = &fg1;
-        }
-        else if(is_in(fg0.id, fg_dependency.dst_ops(fg1.id)))
-        {
-            fg_src = &fg1;
-            fg_dst = &fg0;
-        }
-        else
-        {
-            return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: Not directly connected fusion groups cannot be fused together" };
-        }
-
-        // Find unconnected tensors between fg_src and fg_dst
-        std::vector<Id> unconnected_tensors{};
-        for(const auto &t : fg_dependency.dst_tensors(fg_src->id))
-        {
-            if(!is_in(t, fg_dependency.src_tensors(fg_dst->id)))
-            {
-                unconnected_tensors.push_back(t);
-            }
-        }
-
-        // Check 1: Any unconnected tensor cannot be an ancestor of fg_dst
-        // This guarantees INV2: That is, the fused graph does not have any cycles or loops between different fusion groups
-        for(const auto &t : unconnected_tensors)
-        {
-            if(fg_dependency.path_exists_from_tensor_to_op(t, fg_dst->id))
-            {
-                return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: the fusion would result in cycles or loops" };
-            }
-        }
-
-        // Check 2: All non-root fgs are simple. Ensure INV3
-        if(fg_dst->get_root_kernel()->complexity() != Complexity::Simple)
-        {
-            return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: only root kernel can be a complex kernel" };
-        }
-
-        // Check 3: All non roots' dst tensors have the same shape as that of the root kernel. Ensure INV4
-        const auto root_kernel_dst_tensors = fg_dependency.dst_tensors(fg_src->id);
-        ARM_COMPUTE_ERROR_ON(root_kernel_dst_tensors.size() != 1); // (ASSUMPTION 1: All kernels have exactly 1 dst tensor)
-        const auto root_kernel_dst_tensor_info = original_graph->get_tensor(root_kernel_dst_tensors[0])->desc;
-
-        for(const auto &t : fg_dependency.dst_tensors(fg_dst->id))
-        {
-            const auto t_info = original_graph->get_tensor(t)->desc;
-            if(detail::have_different_dimensions(root_kernel_dst_tensor_info->tensor_shape(), t_info->tensor_shape(), 0))
-            {
-                return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: all non roots' dst tensors should have the same shape as that of the root kernel" };
-            }
-        }
-
-        // Check 4: All kernels within a fg have the same UnitWorkloadStage. Ensure INV5
-        if(!(fg_src->get_root_kernel()->config().stage == fg_dst->get_root_kernel()->config().stage))
-        {
-            return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: all kernels within a fusion group should have the same UnitWorkloadStage" };
-        }
-
-        return Status{};
-    }
-
-    const ClKernelGraph *original_graph{};
-    DependencyGraph      fg_dependency{};
-    KernelFusionGroupMap fusion_groups{};
-    // Note: no need to store tensors pointers in the ClFusedKernelGraph, as they are stored in side the individual fusion groups.
-
-private:
-    Status link_src_tensors(Id fg, const std::vector<Id> &src_tensors)
-    {
-        for(auto t : src_tensors)
-        {
-            fg_dependency.link_input(fg, t);
-        }
-        return Status{};
-    }
-    Status link_dst_tensors(Id fg, const std::vector<Id> &dst_tensors)
-    {
-        for(auto t : dst_tensors)
-        {
-            fg_dependency.link_output(fg, t);
-        }
-        return Status{};
-    }
-    Status remove_fg(Id fg)
-    {
-        fg_dependency.remove_operator(fg);
-        fusion_groups.erase(fg);
-        return Status{};
-    }
-    Status remove_fg_tensor(Id tensor)
-    {
-        fg_dependency.remove_tensor(tensor);
-        return Status{};
-    }
-};
-
-std::vector<const ClKernelFusionGroup *> traverse(const ClFusedKernelGraph &graph);
-std::vector<ClKernelFusionGroup *> traverse(ClFusedKernelGraph &graph);
-
-std::pair<Status, ClFusedKernelGraph> init_fusion_graph(const ClKernelGraph &kernel_graph);
-
-Status fuse(ClFusedKernelGraph &fused_kernel_graph);
-
-Status generate_store(ClKernelBlueprint &bp, const ClFusedKernelGraph &fused_kernel_graph, const ClKernelFusionGroup &fg);
-
-Status generate(ClWorkload &workload, const ClWorkloadContext &ctx, const ClFusedKernelGraph &fused_kernel_graph);
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
deleted file mode 100644
index f10e97e3e9..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELDESCRIPTORS_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELDESCRIPTORS_H
-
-#include "arm_compute/core/experimental/OperatorGraph.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-struct ClDirectConv2dKernelDescriptor
-{
-    friend bool operator==(const ClDirectConv2dKernelDescriptor &desc0, const ClDirectConv2dKernelDescriptor &desc1)
-    {
-        return desc0.conv2d == desc1.conv2d;
-    }
-    Conv2dDescriptor conv2d{};
-};
-
-struct ClElementwiseKernelDescriptor
-{
-    friend bool operator==(const ClElementwiseKernelDescriptor &desc0, const ClElementwiseKernelDescriptor &desc1)
-    {
-        return desc0.eltwise == desc1.eltwise;
-    }
-    ElementwiseDescriptor eltwise{};
-};
-
-struct ClFloorKernelDescriptor
-{
-    friend bool operator==(const ClFloorKernelDescriptor &desc0, const ClFloorKernelDescriptor &desc1)
-    {
-        return desc0.floor == desc1.floor;
-    }
-    FloorDescriptor floor{};
-};
-
-struct ClActivationKernelDescriptor
-{
-    friend bool operator==(const ClActivationKernelDescriptor &, const ClActivationKernelDescriptor &)
-    {
-        return true;
-    }
-};
-
-enum class ClippingStrategy
-{
-    TOP_LEFT,
-    TOP_RIGHT,
-    BOTTOM_LEFT,
-    BOTTOM_RIGHT,
-};
-/** Component: Store */
-struct TileDescriptor
-{
-    Size2D           tile_dims{};
-    Size2D           boundaries{};
-    ClippingStrategy clipping{ ClippingStrategy::TOP_LEFT };
-
-    TileDescriptor()
-    {
-    }
-
-    TileDescriptor(Size2D dims, const Size2D &bound, const ClippingStrategy &clip)
-        : tile_dims(dims), boundaries(bound), clipping(clip)
-    {
-    }
-
-    bool empty() const
-    {
-        return (tile_dims.area() == 0) || (boundaries.area() == 0);
-    }
-    friend bool operator==(const TileDescriptor &tile0, const TileDescriptor &tile1)
-    {
-        return tile0.tile_dims == tile1.tile_dims && tile0.boundaries == tile1.boundaries && tile0.clipping == tile1.clipping;
-    }
-};
-enum class StoreType
-{
-    VStore,
-    VStorePartial,
-    StoreRow,
-    ConvertStoreRow,
-    StoreBlock,
-    ConvertStoreBlock,
-    StoreRowPartial,
-    StoreBlockPartial,
-    StoreBlockBoundaryAware,
-    StoreVectorSelect,
-    TStoreIndirectWidthSelect
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELDESCRIPTORS_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
deleted file mode 100644
index cab51a2ce6..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
-#include "src/core/CL/CLValidate.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h"
-
-#include "support/Cast.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-Status ClDirectConv2dKernel::generate(ClKernelBlueprint &bp) const
-{
-    const auto input  = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
-    const auto weight = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
-    const auto bias   = _tensors.get_const_tensor(TensorType::ACL_SRC_2);
-    const auto dst    = _tensors.get_const_tensor(TensorType::ACL_DST_0);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, weight, dst);
-    ArgumentID input_id;
-    add_tensor(bp, input->desc, input_id, input->id);
-    ArgumentID weight_id;
-    add_tensor(bp, weight->desc, weight_id, weight->id);
-    ArgumentID bias_id = g_arg_placeholder;
-    if(bias != nullptr)
-    {
-        add_tensor(bp, bias->desc, bias_id, bias->id);
-    }
-    ArgumentID dst_id;
-    add_tensor(bp, dst->desc, dst_id, dst->id);
-
-    add_kcomp_direct_conv2d(bp, desc, input_id, weight_id, bias_id, dst_id);
-    return Status{};
-}
-Status ClDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ClDirectConv2dKernelDescriptor &conv2d_desc)
-{
-    // 1. Check validity
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
-    // Matching data type
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
-    if(biases != nullptr)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, biases);
-    }
-
-    // Matching data layout
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, weights);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
-    if(biases != nullptr)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, biases);
-    }
-
-    // All tensor infos are initialized
-    ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
-    ARM_COMPUTE_RETURN_ERROR_ON(weights->tensor_shape().total_size() == 0);
-    ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-    if(biases != nullptr)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON(biases->tensor_shape().total_size() == 0);
-    }
-    // Device requirements are met
-    ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
-    // weights shape is correct
-    const DataLayout data_layout = src->data_layout();
-    const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(channel_idx) != src->dimension(channel_idx), "Weights feature map dimension should match the respective src's one");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->num_dimensions() > 4, "Weights can be at most 4 dimensional");
-
-    // dst shape is correct
-    PadStrideInfo legacy_pad_stride(conv2d_desc.conv2d.stride.x(), conv2d_desc.conv2d.stride.y(), conv2d_desc.conv2d.pad.left, conv2d_desc.conv2d.pad.right, conv2d_desc.conv2d.pad.top,
-                                    conv2d_desc.conv2d.pad.bottom, DimensionRoundingType{});
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(),
-                                                       misc::shape_calculator::compute_deep_convolution_shape(*src, *weights, legacy_pad_stride));
-
-    // biases shape is correct
-    if(biases != nullptr)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(biases->dimension(0) != weights->dimension(3),
-                                        "Biases size and number of dst feature maps should match");
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(biases->num_dimensions() > 1,
-                                        "Biases should be one dimensional");
-    }
-
-    // 2. Check support level
-    // Data type
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
-    // Data layout
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
-
-    return Status{};
-}
-
-bool ClDirectConv2dKernel::operator==(const ClKernel &other) const
-{
-    const auto converted = *utils::cast::polymorphic_downcast<const ClDirectConv2dKernel *>(&other);
-    return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
-}
-
-Status ClElementwiseKernel::generate(ClKernelBlueprint &bp) const
-{
-    const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
-    const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
-    const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
-    ArgumentID lhs_id;
-    add_tensor(bp, lhs->desc, lhs_id, lhs->id);
-    ArgumentID rhs_id;
-    add_tensor(bp, rhs->desc, rhs_id, rhs->id);
-    ArgumentID dst_id;
-    add_tensor(bp, dst->desc, dst_id, dst->id);
-
-    add_kcomp_eltwise_op(bp, desc, lhs_id, rhs_id, dst_id);
-    return Status{};
-}
-
-Status ClElementwiseKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst)
-{
-    // 1. Check validity
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
-
-    // Matching data type
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, dst);
-
-    // Matching data layout
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(lhs, rhs);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(lhs, dst);
-
-    // All tensor infos are initialized
-    ARM_COMPUTE_RETURN_ERROR_ON(lhs->tensor_shape().total_size() == 0);
-    ARM_COMPUTE_RETURN_ERROR_ON(rhs->tensor_shape().total_size() == 0);
-    ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-
-    // Device requirements are met
-    ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(lhs);
-
-    const bool in_place      = (lhs == dst) || (rhs == dst);
-    const bool src0_in_place = in_place && (lhs == dst);
-
-    // dst shape is correct
-    const TensorShape out_shape = TensorShape::broadcast_shape(lhs->tensor_shape(), rhs->tensor_shape());
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0), "Wrong shape for dst");
-    if(in_place)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, src0_in_place ? lhs->tensor_shape() : rhs->tensor_shape(), 0),
-                                        "Wrong shape for dst, cannot do in_place calculation");
-    }
-
-    // 2. Check support level
-
-    // Data type
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16);
-
-    // Data layout
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(lhs, DataLayout::NHWC);
-
-    return Status{};
-}
-
-bool ClElementwiseKernel::operator==(const ClKernel &other) const
-{
-    const auto converted = *utils::cast::polymorphic_downcast<const ClElementwiseKernel *>(&other);
-    return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
-}
-
-Status ClFloorKernel::generate(ClKernelBlueprint &bp) const
-{
-    const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
-    const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
-    ArgumentID src_id;
-    add_tensor(bp, src->desc, src_id, src->id);
-    ArgumentID dst_id;
-    add_tensor(bp, dst->desc, dst_id, dst->id);
-
-    add_kcomp_floor(bp, desc, src_id, dst_id);
-    return Status{};
-}
-
-Status ClFloorKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    // 1. Check validity
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
-
-    // Matching data type
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
-
-    // Matching data layout
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
-
-    // All tensor infos are initialized
-    ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
-    ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-
-    // Device requirements are met
-    ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
-
-    // dst shape is correct
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(src->tensor_shape(), dst->tensor_shape(), 0), "Wrong shape for dst");
-
-    // 2. Check support level
-
-    // Data type
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32, DataType::F16);
-
-    // Data layout
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
-
-    return Status{};
-}
-
-bool ClFloorKernel::operator==(const ClKernel &other) const
-{
-    const auto converted = *utils::cast::polymorphic_downcast<const ClFloorKernel *>(&other);
-    return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
-}
-
-std::vector<const ClKernel *> traverse(const ClKernelGraph &graph)
-{
-    std::vector<const ClKernel *> kernels;
-    const auto                    sorted = graph.graph.topological_sort();
-    for(const auto &pack : sorted.second)
-    {
-        kernels.push_back(graph.kernels.at(pack.op).get());
-    }
-    return kernels;
-}
-
-std::vector<ClKernel *> traverse(ClKernelGraph &graph)
-{
-    std::vector<ClKernel *> kernels;
-    const auto              sorted = graph.graph.topological_sort();
-    for(const auto &pack : sorted.second)
-    {
-        kernels.push_back(graph.kernels.at(pack.op).get());
-    }
-    return kernels;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
deleted file mode 100644
index c3580cfaca..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELGRAPH_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELGRAPH_H
-
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/experimental/ClWorkload.h"
-#include "arm_compute/core/experimental/DependencyGraph.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h"
-#include "support/DeepCopy.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-struct ClKernelGraph;
-class ClKernelBlueprint;
-
-enum class Complexity
-{
-    Simple,
-    Complex
-};
-
-/** Configurations for ClKernel
- *
- */
-struct ClKernelConfig
-{
-    UnitWorkloadStage stage{};
-    TileDescriptor    tile_desc{};
-    StoreType         store_type{};
-    friend bool operator==(const ClKernelConfig &config0, const ClKernelConfig &config1)
-    {
-        return config0.stage == config1.stage && config0.tile_desc == config1.tile_desc && config0.store_type == config1.store_type;
-    }
-};
-
-struct ClKernelTensor
-{
-public:
-    using Id         = DependencyGraph::Id;
-    ClKernelTensor() = default;
-    ClKernelTensor(Id id, ITensorInfo *desc, MemoryType memory_type, const AuxMemoryInfo &memory_info)
-        : id{ id }, desc{ desc }, memory_type{ memory_type }, memory_info{ memory_info }
-    {
-    }
-    bool operator==(const ClKernelTensor &other) const
-    {
-        return desc == other.desc;
-    }
-
-    Id            id{};
-    ITensorInfo *desc{};
-    MemoryType    memory_type{};
-    AuxMemoryInfo memory_info{};
-};
-
-struct ClKernel
-{
-public:
-    using Id                         = DependencyGraph::Id;
-    ClKernel()                       = default;
-    virtual ~ClKernel()              = default;
-    ClKernel(const ClKernel &kernel) = default;
-    ClKernel &operator=(const ClKernel &kernel) = default;
-    ClKernel(ClKernel &&kernel)                 = default;
-    ClKernel &operator=(ClKernel &&kernel) = default;
-    ClKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ITensorDescPack<ClKernelTensor> &tensors)
-        : _graph{ graph }, _id{ id }, _config{ config }, _tensors{ tensors }
-    {
-    }
-    virtual bool operator==(const ClKernel &other) const = 0;
-    virtual Complexity complexity() const                = 0;
-    virtual Status generate(ClKernelBlueprint &bp) const = 0;
-    Id id() const
-    {
-        return _id;
-    }
-    ITensorDescPack<ClKernelTensor> tensors() const
-    {
-        return _tensors;
-    }
-    ClKernelConfig config() const
-    {
-        return _config;
-    }
-
-protected:
-    const ClKernelGraph            *_graph {};
-    Id                              _id{};
-    ClKernelConfig                  _config{};
-    ITensorDescPack<ClKernelTensor> _tensors{};
-};
-
-struct ClDirectConv2dKernel : public ClKernel
-{
-public:
-    Complexity complexity() const override
-    {
-        return Complexity::Complex;
-    }
-    ClDirectConv2dKernel()           = default;
-    ~ClDirectConv2dKernel() override = default;
-    ClDirectConv2dKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig config, const ClDirectConv2dKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
-        : ClKernel{ graph, id, config, tensors }, desc{ desc }
-    {
-    }
-    static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ClDirectConv2dKernelDescriptor &conv2d_desc);
-    bool operator==(const ClKernel &other) const override;
-    Status generate(ClKernelBlueprint &bp) const override;
-
-    ClDirectConv2dKernelDescriptor desc{};
-};
-
-struct ClElementwiseKernel : public ClKernel
-{
-public:
-    Complexity complexity() const override
-    {
-        return Complexity::Simple;
-    }
-    ClElementwiseKernel()           = default;
-    ~ClElementwiseKernel() override = default;
-    ClElementwiseKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClElementwiseKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
-        : ClKernel{ graph, id, config, tensors }, desc{ desc }
-    {
-    }
-    static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst);
-    bool operator==(const ClKernel &other) const override;
-    Status generate(ClKernelBlueprint &bp) const override;
-
-    ClElementwiseKernelDescriptor desc{};
-};
-
-struct ClFloorKernel : public ClKernel
-{
-public:
-    Complexity complexity() const override
-    {
-        return Complexity::Simple;
-    }
-    ClFloorKernel()           = default;
-    ~ClFloorKernel() override = default;
-    ClFloorKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClFloorKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
-        : ClKernel{ graph, id, config, tensors }, desc{ desc }
-    {
-    }
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-    bool operator==(const ClKernel &other) const override;
-    Status generate(ClKernelBlueprint &bp) const override;
-
-    ClFloorKernelDescriptor desc{};
-};
-
-struct ClKernelGraph
-{
-public:
-    using Id              = DependencyGraph::Id;
-    using KernelMap       = std::map<Id, utils::memory::deep_unique_ptr<ClKernel>>;
-    using KernelTensorMap = std::map<Id, utils::memory::deep_unique_ptr<ClKernelTensor>>;
-
-    ClKernelGraph()  = default;
-    ~ClKernelGraph() = default;
-
-    friend bool operator==(const ClKernelGraph &graph0, const ClKernelGraph &graph1)
-    {
-        return graph0.graph == graph1.graph && graph0.kernels == graph1.kernels && graph0.tensors == graph1.tensors;
-    }
-
-    Status add_kernel_tensor(ITensorInfo *desc, MemoryType memory_type, const AuxMemoryInfo &memory_info, Id &tensor_id, Id merge_point = DependencyGraph::empty_id())
-    {
-        tensor_id = graph.add_tensor(merge_point);
-        if(tensors.find(tensor_id) == tensors.end())
-        {
-            tensors[tensor_id] = utils::memory::make_deep_unique<ClKernelTensor, ClKernelTensor>(tensor_id, desc, memory_type, memory_info);
-        }
-        return Status{};
-    }
-
-    template <typename ContentT, typename KernelDescT>
-    Status add_kernel(const ClKernelConfig &config, const KernelDescT &desc, const ITensorDescPack<ClKernelTensor> &tensors, Id &kernel_id)
-    {
-        const auto      src_tensors = tensors.get_const_src_tensors();
-        const auto      dst_tensors = tensors.get_const_dst_tensors();
-        std::vector<Id> src_tensor_ids{};
-        std::vector<Id> dst_tensor_ids{};
-        for(const auto &t : src_tensors)
-        {
-            src_tensor_ids.push_back(t->id);
-        }
-        for(const auto &t : dst_tensors)
-        {
-            dst_tensor_ids.push_back(t->id);
-        }
-        kernel_id          = graph.add_operator(src_tensor_ids, dst_tensor_ids).second;
-        auto k             = utils::memory::make_deep_unique<ClKernel, ContentT>(this, kernel_id, config, desc, tensors);
-        kernels[kernel_id] = std::move(k);
-        return Status{};
-    }
-
-    ClKernel *get_kernel(Id id)
-    {
-        return kernels.at(id).get();
-    }
-    const ClKernel *get_kernel(Id id) const
-    {
-        return kernels.at(id).get();
-    }
-
-    ClKernelTensor *get_tensor(Id id)
-    {
-        return tensors.at(id).get();
-    }
-    const ClKernelTensor *get_tensor(Id id) const
-    {
-        return tensors.at(id).get();
-    }
-
-    DependencyGraph graph{};
-    KernelMap       kernels{};
-    KernelTensorMap tensors{};
-};
-using Id = DependencyGraph::Id;
-
-std::vector<const ClKernel *> traverse(const ClKernelGraph &graph);
-std::vector<ClKernel *> traverse(ClKernelGraph &graph);
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELGRAPH_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClWorkload.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClWorkload.cpp
deleted file mode 100644
index dcada4f64b..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClWorkload.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "arm_compute/core/experimental/ClWorkload.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-Status build(ClWorkload &workload, const OperatorGraph &op_graph, const ClWorkloadContext &ctx)
-{
-    workload.context = ctx;
-    ClKernelGraph kernel_graph;
-    workload.status = validate(op_graph);
-    ARM_COMPUTE_RETURN_ON_ERROR(workload.status);
-    workload.status = translate(kernel_graph, *op_graph.impl());
-    ARM_COMPUTE_RETURN_ON_ERROR(workload.status);
-    ClFusedKernelGraph fused_k_graph;
-    std::tie(workload.status, fused_k_graph) = init_fusion_graph(kernel_graph);
-    ARM_COMPUTE_RETURN_ON_ERROR(workload.status);
-    workload.status = fuse(fused_k_graph);
-    ARM_COMPUTE_RETURN_ON_ERROR(workload.status);
-    workload.status = generate(workload, ctx, fused_k_graph);
-    ARM_COMPUTE_RETURN_ON_ERROR(workload.status);
-
-    // Get operator tensor id to workload tensor id map
-    const auto op_tensor_to_kernel_tensor       = fused_k_graph.original_graph->graph.get_merge_points();
-    const auto kernel_tensor_to_workload_tensor = workload.graph.get_merge_points();
-    for(const auto op_t : op_graph.impl()->graph.src_tensors())
-    {
-        const auto kernel_t                   = op_tensor_to_kernel_tensor.at(op_t);
-        const auto workload_t                 = kernel_tensor_to_workload_tensor.at(kernel_t);
-        workload.op_tensor_id_lut[workload_t] = op_t;
-    }
-    for(const auto op_t : op_graph.impl()->graph.dst_tensors())
-    {
-        const auto kernel_t                   = op_tensor_to_kernel_tensor.at(op_t);
-        const auto workload_t                 = kernel_tensor_to_workload_tensor.at(kernel_t);
-        workload.op_tensor_id_lut[workload_t] = op_t;
-    }
-    return workload.status;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/DependencyGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/DependencyGraph.cpp
deleted file mode 100644
index 7350255ebe..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/DependencyGraph.cpp
+++ /dev/null
@@ -1,430 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "arm_compute/core/experimental/DependencyGraph.h"
-
-#include <algorithm>
-#include <deque>
-#include <set>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-DependencyGraph::DependencyGraph(const AdjList &adj_src_tensors, const AdjList &adj_dst_tensors, const AdjList &adj_src_ops, const AdjList &adj_dst_ops, std::map<Id, Id> merge_points)
-    : _adj_src_tensors{ adj_src_tensors }, _adj_dst_tensors{ adj_dst_tensors }, _adj_src_ops{ adj_src_ops }, _adj_dst_ops{ adj_dst_ops }, _merge_to_internal{ merge_points }, _operator_id{}, _tensor_id{}
-{
-}
-DependencyGraph::DependencyGraph(const std::vector<Id> &imported_tensors)
-    : _adj_src_tensors{}, _adj_dst_tensors{}, _adj_src_ops{}, _adj_dst_ops{}, _merge_to_internal{}, _operator_id{}, _tensor_id{}
-{
-    for(auto t : imported_tensors)
-    {
-        _adj_src_ops[t] = {};
-        _adj_dst_ops[t] = {};
-    }
-}
-
-Status DependencyGraph::update_merge_point(Id t_id, Id merge_point)
-{
-    if(_merge_to_internal.find(merge_point) == _merge_to_internal.end())
-    {
-        return Status{ ErrorCode::RUNTIME_ERROR, "Merge point does not exist" };
-    }
-    _merge_to_internal[merge_point] = t_id;
-    return Status{};
-}
-
-DependencyGraph::Id DependencyGraph::add_tensor(Id merge_tensor)
-{
-    Id new_tensor{ empty_id() };
-    if(merge_tensor != empty_id())
-    {
-        if(_merge_to_internal.find(merge_tensor) != _merge_to_internal.end())
-        {
-            new_tensor = _merge_to_internal[merge_tensor];
-        }
-        else
-        {
-            new_tensor                       = insert_new_tensor();
-            _merge_to_internal[merge_tensor] = new_tensor;
-        }
-    }
-    else
-    {
-        new_tensor = insert_new_tensor();
-    }
-    return new_tensor;
-}
-
-void DependencyGraph::remove_tensor(Id tensor)
-{
-    for(auto src_op : _adj_src_ops.at(tensor))
-    {
-        auto &dst_tensors = _adj_dst_tensors.at(src_op);
-        dst_tensors.erase(
-            std::remove(std::begin(dst_tensors), std::end(dst_tensors), tensor),
-            std::end(dst_tensors));
-    }
-    for(auto dst_op : _adj_dst_ops.at(tensor))
-    {
-        auto &src_tensors = _adj_src_tensors.at(dst_op);
-        src_tensors.erase(
-            std::remove(std::begin(src_tensors), std::end(src_tensors), tensor),
-            std::end(src_tensors));
-    }
-    _adj_src_ops.erase(tensor);
-    _adj_dst_ops.erase(tensor);
-}
-
-std::pair<Status, DependencyGraph::Id> DependencyGraph::add_operator(const std::vector<Id> &inputs, const std::vector<Id> &outputs)
-{
-    Id new_op = insert_new_op();
-    for(Id tensor : inputs)
-    {
-        link_input(new_op, tensor);
-    }
-    for(Id tensor : outputs)
-    {
-        link_output(new_op, tensor);
-    }
-
-    // Use topological sort in order to detect possible loops / cycles.
-    // NOTE: This is unscalable. We'll need to have a better way of detecting loops or relax this invariant during operation, and add a validate method instead
-    return std::pair<Status, DependencyGraph::Id>(topological_sort().first, new_op);
-}
-
-void DependencyGraph::remove_operator(Id op)
-{
-    for(auto src_tensor : _adj_src_tensors.at(op))
-    {
-        auto &dst_ops = _adj_dst_ops.at(src_tensor);
-        dst_ops.erase(
-            std::remove(std::begin(dst_ops), std::end(dst_ops), op),
-            std::end(dst_ops));
-    }
-    for(auto dst_tensor : _adj_dst_tensors.at(op))
-    {
-        auto &src_ops = _adj_src_ops.at(dst_tensor);
-        src_ops.erase(
-            std::remove(std::begin(src_ops), std::end(src_ops), op),
-            std::end(src_ops));
-    }
-    _adj_src_tensors.erase(op);
-    _adj_dst_tensors.erase(op);
-}
-
-std::map<DependencyGraph::Id, DependencyGraph::Id> DependencyGraph::get_merge_points() const
-{
-    return _merge_to_internal;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::get_root_ops() const
-{
-    std::vector<Id> ops{};
-    const auto      op_list = all_ops();
-
-    for(auto op : op_list)
-    {
-        if(src_ops(op).empty())
-        {
-            ops.emplace_back(op);
-        }
-    }
-    return ops;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::get_dst_ops() const
-{
-    std::vector<Id> ops{};
-    const auto      op_list = all_ops();
-
-    for(auto op : op_list)
-    {
-        if(dst_ops(op).empty())
-        {
-            ops.emplace_back(op);
-        }
-    }
-    return ops;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::src_tensors(Id op) const
-{
-    ARM_COMPUTE_ERROR_ON(!operator_exists(op));
-    return _adj_src_tensors.at(op);
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::dst_tensors(Id op) const
-{
-    ARM_COMPUTE_ERROR_ON(!operator_exists(op));
-    return _adj_dst_tensors.at(op);
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::src_tensors() const
-{
-    std::vector<Id> tensors;
-    for(auto tensor_src_ops : _adj_src_ops)
-    {
-        if(tensor_src_ops.second.empty())
-            tensors.push_back(tensor_src_ops.first);
-    }
-    return tensors;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::dst_tensors() const
-{
-    std::vector<Id> tensors;
-    for(auto tensor_dst_ops : _adj_dst_ops)
-    {
-        if(tensor_dst_ops.second.empty())
-            tensors.push_back(tensor_dst_ops.first);
-    }
-    return tensors;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::src_ops_from_tensor(Id tensor) const
-{
-    return _adj_src_ops.at(tensor);
-}
-std::vector<DependencyGraph::Id> DependencyGraph::dst_ops_from_tensor(Id tensor) const
-{
-    return _adj_dst_ops.at(tensor);
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::all_ops() const
-{
-    std::vector<Id> ops{};
-    std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops), [](const auto & it)
-    {
-        return it.first;
-    });
-    return ops;
-}
-
-bool DependencyGraph::path_exists_from_tensor_to_op(Id src_tensor, Id dst_op) const
-{
-    for(auto child_op : dst_ops_from_tensor(src_tensor))
-    {
-        if(path_exists_from_op_to_op(child_op, dst_op))
-        {
-            return true;
-        }
-    }
-    return false;
-}
-
-bool DependencyGraph::path_exists_from_op_to_op(Id src_op, Id dst_op) const
-{
-    if(src_op == dst_op)
-    {
-        return true;
-    }
-    if(is_in(src_op, get_dst_ops()))
-    {
-        return false;
-    }
-    for(auto child_tensor : dst_tensors(src_op))
-    {
-        if(path_exists_from_tensor_to_op(child_tensor, dst_op))
-        {
-            return true;
-        }
-    }
-    return false;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::all_tensors() const
-{
-    std::vector<Id> tensors{};
-    std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors), [](const auto & it)
-    {
-        return it.first;
-    });
-    return tensors;
-}
-
-unsigned int DependencyGraph::number_of_ops() const
-{
-    return _adj_src_tensors.size();
-}
-
-unsigned int DependencyGraph::number_of_tensors() const
-{
-    return _adj_src_ops.size();
-}
-
-DependencyGraph::Id DependencyGraph::insert_new_tensor()
-{
-    Id new_tensor            = _tensor_id.alloc();
-    _adj_src_ops[new_tensor] = {};
-    _adj_dst_ops[new_tensor] = {};
-    return new_tensor;
-}
-DependencyGraph::Id DependencyGraph::insert_new_op()
-{
-    Id new_op                = _operator_id.alloc();
-    _adj_src_tensors[new_op] = {};
-    _adj_dst_tensors[new_op] = {};
-    return new_op;
-}
-void DependencyGraph::link_input(Id op, Id in_tensor)
-{
-    ARM_COMPUTE_ERROR_ON(!operator_exists(op));
-    ARM_COMPUTE_ERROR_ON(!tensor_exists(in_tensor));
-    ARM_COMPUTE_ERROR_ON(are_connected(op, in_tensor));
-    _adj_src_tensors[op].push_back(in_tensor);
-    _adj_dst_ops[in_tensor].push_back(op);
-}
-void DependencyGraph::link_output(Id op, Id out_tensor)
-{
-    ARM_COMPUTE_ERROR_ON(!operator_exists(op));
-    ARM_COMPUTE_ERROR_ON(!tensor_exists(out_tensor));
-    ARM_COMPUTE_ERROR_ON(are_connected(op, out_tensor));
-    _adj_dst_tensors[op].push_back(out_tensor);
-    _adj_src_ops[out_tensor].push_back(op);
-}
-bool DependencyGraph::tensor_exists(Id tensor) const
-{
-    return _adj_src_ops.find(tensor) != _adj_src_ops.end() && _adj_dst_ops.find(tensor) != _adj_dst_ops.end();
-}
-bool DependencyGraph::operator_exists(Id op) const
-{
-    return _adj_src_tensors.find(op) != _adj_src_tensors.end() && _adj_dst_tensors.find(op) != _adj_dst_tensors.end();
-}
-
-bool DependencyGraph::is_src_tensor(Id tensor) const
-{
-    if(!tensor_exists(tensor))
-    {
-        return false;
-    }
-    return _adj_src_ops.at(tensor).empty();
-}
-
-bool DependencyGraph::is_dst_tensor(Id tensor) const
-{
-    if(!tensor_exists(tensor))
-    {
-        return false;
-    }
-    return _adj_dst_ops.at(tensor).empty();
-}
-bool DependencyGraph::is_src_tensor_of(Id op, Id tensor) const
-{
-    if(!operator_exists(op) || !tensor_exists(tensor))
-    {
-        return false;
-    }
-    const auto op_inputs = src_tensors(op);
-    return std::find(op_inputs.begin(), op_inputs.end(), tensor) != op_inputs.end();
-}
-bool DependencyGraph::is_dst_tensor_of(Id op, Id tensor) const
-{
-    if(!operator_exists(op) || !tensor_exists(tensor))
-    {
-        return false;
-    }
-    const auto op_outputs = dst_tensors(op);
-    return std::find(op_outputs.begin(), op_outputs.end(), tensor) != op_outputs.end();
-}
-bool DependencyGraph::are_connected(Id op, Id tensor) const
-{
-    return is_src_tensor_of(op, tensor) || is_dst_tensor_of(op, tensor);
-}
-std::vector<DependencyGraph::Id> DependencyGraph::src_ops(Id op) const
-{
-    ARM_COMPUTE_ERROR_ON(!operator_exists(op));
-    std::vector<Id> ops{};
-    for(Id src_tensor : src_tensors(op))
-    {
-        ops.insert(ops.end(), std::begin(_adj_src_ops.at(src_tensor)), std::end(_adj_src_ops.at(src_tensor)));
-    }
-    return ops;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::dst_ops(Id op) const
-{
-    ARM_COMPUTE_ERROR_ON(!operator_exists(op));
-    std::vector<Id> ops{};
-    for(Id dst_tensor : _adj_dst_tensors.at(op))
-    {
-        ops.insert(ops.end(), std::begin(_adj_dst_ops.at(dst_tensor)), std::end(_adj_dst_ops.at(dst_tensor)));
-    }
-    return ops;
-}
-
-std::pair<Status, std::vector<DependencyGraph::OpPack>> DependencyGraph::topological_sort() const
-{
-    // Incident degree (number of source operators to an op)
-    std::map<Id, unsigned int> in_degree{};
-    std::set<Id>        visited_ops{};
-    std::deque<Id>      zero_in_degree_ops{};
-    std::vector<OpPack> sorted_op_packs{};
-    for(auto op : all_ops())
-    {
-        const auto degree = src_ops(op).size();
-        in_degree[op]     = degree;
-        if(degree == 0)
-        {
-            zero_in_degree_ops.push_back(op);
-            visited_ops.insert(op);
-        }
-    }
-
-    while(!zero_in_degree_ops.empty())
-    {
-        const Id op = zero_in_degree_ops.front();
-        zero_in_degree_ops.pop_front();
-        sorted_op_packs.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });
-
-        for(const auto next_op : dst_ops(op))
-        {
-            if(in_degree[next_op] > 0)
-            {
-                in_degree[next_op]--;
-            }
-            if(in_degree[next_op] == 0 && visited_ops.find(next_op) == visited_ops.end())
-            {
-                zero_in_degree_ops.push_back(next_op);
-                visited_ops.insert(op);
-            }
-        }
-    }
-
-    // If there are remaining ops with in_degree > 0, then it's indication that there are cycles in the graph
-    Status st{};
-    if(sorted_op_packs.size() != number_of_ops())
-    {
-        st = Status{ ErrorCode::RUNTIME_ERROR, "Cycles or loops are not allowed in a DependencyGraph" };
-    }
-    return std::make_pair(st, sorted_op_packs);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h
deleted file mode 100644
index a4e4eaa3bb..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_ITENSORDESCPACK_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_ITENSORDESCPACK_H
-
-#include <cstddef>
-#include <unordered_map>
-#include <vector>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-template <typename TDesc>
-class ITensorDescPack
-{
-public:
-    struct PackElement
-    {
-        PackElement()                    = default;
-        ~PackElement()                   = default;
-        PackElement(const PackElement &) = default;
-        PackElement &operator=(const PackElement &) = default;
-        PackElement(PackElement &&)                 = default;
-        PackElement &operator=(PackElement &&) = default;
-        PackElement(int id, TDesc *tensor)
-            : id(id), tensor(tensor), ctensor(nullptr)
-        {
-        }
-        PackElement(int id, const TDesc *ctensor)
-            : id(id), tensor(nullptr), ctensor(ctensor)
-        {
-        }
-
-        int          id{ -1 };
-        TDesc       *tensor{ nullptr };
-        const TDesc *ctensor{ nullptr };
-
-        friend bool operator==(const PackElement &elem0, const PackElement &elem1)
-        {
-            const bool same_ctensor = (elem0.tensor == nullptr && elem1.tensor == nullptr && elem0.ctensor != nullptr && elem1.ctensor != nullptr && *elem0.ctensor == *elem1.ctensor);
-            const bool same_tensor  = (elem0.ctensor == nullptr && elem1.ctensor == nullptr && elem0.tensor != nullptr && elem1.tensor != nullptr && *elem0.tensor == *elem1.tensor);
-
-            return elem0.id == elem1.id && (same_ctensor || same_tensor);
-        }
-    };
-
-public:
-    /** Default Constructor */
-    ITensorDescPack()                                           = default;
-    ~ITensorDescPack()                                          = default;
-    ITensorDescPack<TDesc>(const ITensorDescPack<TDesc> &other) = default;
-    ITensorDescPack<TDesc> &operator=(const ITensorDescPack<TDesc> &other) = default;
-    ITensorDescPack<TDesc>(ITensorDescPack<TDesc> &&other)                 = default;
-    ITensorDescPack<TDesc> &operator=(ITensorDescPack<TDesc> &&other) = default;
-    /**  Initializer list Constructor */
-    ITensorDescPack(std::initializer_list<PackElement> l)
-        : _pack{}
-    {
-        for(auto &e : l)
-        {
-            _pack[e.id] = e;
-        }
-    }
-    /** Add tensor to the pack
-     *
-     * @param[in] id     ID/type of the tensor to add
-     * @param[in] tensor Tensor to add
-     */
-    void add_tensor(int id, TDesc *tensor)
-    {
-        _pack[id] = PackElement(id, tensor);
-    }
-
-    /** Add const tensor to the pack
-     *
-     * @param[in] id     ID/type of the tensor to add
-     * @param[in] tensor Tensor to add
-     */
-    void add_const_tensor(int id, const TDesc *tensor)
-    {
-        _pack[id] = PackElement(id, tensor);
-    }
-    /** Get tensor of a given id from the pac
-     *
-     * @param[in] id ID of tensor to extract
-     *
-     * @return The pointer to the tensor if exist and is non-const else nullptr
-     */
-    TDesc *get_tensor(int id)
-    {
-        auto it = _pack.find(id);
-        return it != _pack.end() ? it->second.tensor : nullptr;
-    }
-    /** Get constant tensor of a given id
-     *
-     * @param[in] id ID of tensor to extract
-     *
-     * @return The pointer to the tensor if exist and is const else nullptr
-     */
-    const TDesc *get_const_tensor(int id) const
-    {
-        auto it = _pack.find(id);
-        if(it != _pack.end())
-        {
-            return it->second.ctensor != nullptr ? it->second.ctensor : it->second.tensor;
-        }
-        return nullptr;
-    }
-    /** Remove the tensor stored with the given id
-     *
-     * @param[in] id ID of tensor to remove
-     */
-    void remove_tensor(int id)
-    {
-        _pack.erase(id);
-    }
-    /** Pack size accessor
-     *
-     * @return Number of tensors registered to the pack
-     */
-    size_t size() const
-    {
-        return _pack.size();
-    }
-    /** Checks if pack is empty
-     *
-     * @return True if empty else false
-     */
-    bool empty() const
-    {
-        return _pack.empty();
-    }
-
-    /** Get the ACL_SRC_* tensors
-     *
-     * @return std::vector<TDesc *>
-     */
-    std::vector<TDesc *> get_src_tensors()
-    {
-        std::vector<TDesc *> src_tensors{};
-        for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
-        {
-            auto tensor = get_tensor(id);
-            if(tensor != nullptr)
-            {
-                src_tensors.push_back(tensor);
-            }
-        }
-        return src_tensors;
-    }
-    /** Get the const ACL_SRC_* tensors
-     *
-     * @return std::vector<const TDesc *>
-     */
-    std::vector<const TDesc *> get_const_src_tensors() const
-    {
-        std::vector<const TDesc *> src_tensors{};
-        for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
-        {
-            auto tensor = get_const_tensor(id);
-            if(tensor != nullptr)
-            {
-                src_tensors.push_back(tensor);
-            }
-        }
-        return src_tensors;
-    }
-    /** Get the ACL_DST_* tensors
-     *
-     * @return std::vector<TDesc *>
-     */
-    std::vector<TDesc *> get_dst_tensors()
-    {
-        std::vector<TDesc *> dst_tensors{};
-        for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
-        {
-            auto tensor = get_tensor(id);
-            if(tensor != nullptr)
-            {
-                dst_tensors.push_back(tensor);
-            }
-        }
-        return dst_tensors;
-    }
-    /** Get the const ACL_DST_* tensors
-     *
-     * @return std::vector<const TDesc *>
-     */
-    std::vector<const TDesc *> get_const_dst_tensors() const
-    {
-        std::vector<const TDesc *> dst_tensors{};
-        for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
-        {
-            auto tensor = get_const_tensor(id);
-            if(tensor != nullptr)
-            {
-                dst_tensors.push_back(tensor);
-            }
-        }
-        return dst_tensors;
-    }
-
-    friend bool operator==(const ITensorDescPack<TDesc> &pack0, const ITensorDescPack<TDesc> &pack1)
-    {
-        return pack0._pack == pack1._pack;
-    }
-
-private:
-    std::unordered_map<int, PackElement> _pack{}; /**< Container with the packed tensors */
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_ITENSORDESCPACK_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
deleted file mode 100644
index 663b89e235..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-namespace
-{
-Status add_kernel_tensor(ClKernelGraph &k_graph, const OperatorGraph::Implementation &op_graph, const OpTensorContent &op_tensor, MemoryType memory_type, AuxMemoryInfo memory_info,
-                         DependencyGraph::Id &id)
-{
-    ARM_COMPUTE_UNUSED(op_graph);
-    return k_graph.add_kernel_tensor(op_tensor.desc, memory_type, memory_info, id, op_tensor.id);
-}
-
-Status add_kernel_tensor(ClKernelGraph &k_graph, const OperatorGraph::Implementation &op_graph, const OpTensorContent &op_tensor, DependencyGraph::Id &id)
-{
-    // For a tensor t
-    // 1. If t is a src tensor of the entire op graph, then it's Core.
-    //    (Optimisation opportunity, if we guanrantee that all translate methods are called in topological order, we can always assign t to Core.
-    //       Because even if the op is non-root (which would mean t should be an Aux tensor), the src tensors would be already be determined by the ancestor ops (topological order), and thus would not be overriden by it)
-    // 2. If t is a dst tensor of the entire op graph, then it's Core.
-    // 3. Aux tensor with Persistent and Prepare lifetime is manually specified
-    // 4. All other ts not captured by the above are assigned Aux, with lifetime of Temporary.
-    // kernel_graph.add_kernel_tensor(input->desc, );
-    bool          is_src_tensor_of_graph = is_in(op_tensor.id, op_graph.graph.src_tensors());
-    bool          is_dst_tensor_of_graph = is_in(op_tensor.id, op_graph.graph.dst_tensors());
-    MemoryType    memory_type;
-    AuxMemoryInfo memory_info;
-    if(is_src_tensor_of_graph || is_dst_tensor_of_graph)
-    {
-        memory_type = MemoryType::Core;
-    }
-    else
-    {
-        memory_type          = MemoryType::Auxiliary;
-        memory_info.lifetime = AuxMemoryLifetime::Temporary;
-        memory_info.size     = op_tensor.desc->total_size();
-    }
-    return add_kernel_tensor(k_graph, op_graph, op_tensor, memory_type, memory_info, id);
-}
-
-/** Get the suitable kernel size for using direct convolution method with NHWC data layout.
- *
- * @note Duplicate of the function with the same name in src/gpu/cl/operators/ClConv2d.cpp
- *
- * @note Direct convolution should be executed when the kernel has the spatial dimensions greater than or equal to the value returned by this function
- *
- * @param[in] gpu_target GPU target
- *
- * @return the suitable kernel size for using direct convolution method with NHWC data layout
- */
-size_t get_direct_conv_kernel_threshold_nhwc(arm_compute::GPUTarget gpu_target)
-{
-    switch(gpu_target)
-    {
-        case arm_compute::GPUTarget::G76:
-        case arm_compute::GPUTarget::G77:
-        case arm_compute::GPUTarget::G78:
-            return 5;
-        case arm_compute::GPUTarget::G71:
-        case arm_compute::GPUTarget::G72:
-        case arm_compute::GPUTarget::MIDGARD:
-        case arm_compute::GPUTarget::BIFROST:
-            return 7;
-        default:
-            return 5;
-    }
-}
-} // namespace
-
-bool operator==(const OpTensor &t0, const OpTensor &t1)
-{
-    return std::make_tuple(t0.id()) == std::make_tuple(t1.id());
-}
-bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1)
-{
-    return std::make_tuple(conv2d0.stride, conv2d0.dilation) == std::make_tuple(conv2d1.stride, conv2d1.dilation);
-}
-
-bool operator==(const ElementwiseDescriptor &ed0, const ElementwiseDescriptor &ed1)
-{
-    return ed0.op == ed1.op; // Compare Arithmatic Operations of two ElementwiseDescriptor objects
-}
-
-bool operator==(const FloorDescriptor &, const FloorDescriptor &)
-{
-    return std::make_tuple() == std::make_tuple(); // Currently two Floor ops are always the same
-}
-
-bool Conv2dContent::operator==(const OperatorContent &other) const
-{
-    const auto converted = *utils::cast::polymorphic_downcast<const Conv2dContent *>(&other);
-    return desc == converted.desc;
-}
-
-bool ElementwiseContent::operator==(const OperatorContent &other) const
-{
-    const auto converted = *utils::cast::polymorphic_downcast<const ElementwiseContent *>(&other);
-    return desc == converted.desc;
-}
-
-bool FloorContent::operator==(const OperatorContent &other) const
-{
-    const auto converted = *utils::cast::polymorphic_downcast<const FloorContent *>(&other);
-    return desc == converted.desc;
-}
-
-ConvolutionMethod Conv2dContent::select_conv_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dDescriptor &conv2d_desc, const GPUTarget gpu_target)
-{
-    // Modified from ClConv2d::get_convolution_method
-
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(weights);
-
-    const PadStrideInfo legacy_pad_stride(conv2d_desc.stride.x(), conv2d_desc.stride.y(), conv2d_desc.pad.left, conv2d_desc.pad.right, conv2d_desc.pad.top, conv2d_desc.pad.bottom, DimensionRoundingType{});
-    const Size2D        dilation = conv2d_desc.dilation;
-
-    const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
-    const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
-    const size_t idx_c = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);
-
-    /* Input spatial dims, kernel size, IFM/OFM, conv info*/
-    using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo, DataLayout>;
-    using ConfigurationMethod      = std::pair<ConvolutionConfiguration, ConvolutionMethod>;
-
-    const std::vector<ConfigurationMethod> known_configs =
-    {
-        // Alexnet
-        ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U), DataLayout::NCHW), ConvolutionMethod::DIRECT),
-        // VGG16 / VGG19
-        ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U), DataLayout::NCHW), ConvolutionMethod::DIRECT),
-        // Mobilenet 224
-        ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),
-        // Mobilenet 160
-        ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),
-        // Mobilenet 224
-        ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),
-        // Mobilenet 160
-        ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),
-    };
-
-    const auto find_config = [&](ConfigurationMethod c)
-    {
-        const ConvolutionConfiguration config      = c.first;
-        const PadStrideInfo            info        = std::get<3>(config);
-        const DataLayout               data_layout = std::get<4>(config);
-
-        return std::get<0>(config) == Size2D(src->dimension(idx_w), src->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))
-               && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == legacy_pad_stride.pad_top() && info.pad_right() == legacy_pad_stride.pad_right()
-               && info.pad_bottom() == legacy_pad_stride.pad_bottom() && info.pad_left() == legacy_pad_stride.pad_left() && info.stride() == legacy_pad_stride.stride() && (data_layout == src->data_layout());
-    };
-
-    std::vector<ConfigurationMethod>::const_iterator found;
-    if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())
-    {
-        return (*found).second;
-    }
-
-    if(dilation != Size2D(1U, 1U))
-    {
-        return ConvolutionMethod::GEMM;
-    }
-    else
-    {
-        if(src->data_layout() == DataLayout::NCHW)
-        {
-            ARM_COMPUTE_ERROR("NCHW not supported");
-        }
-        else
-        {
-            const bool   is_direct_valid           = bool(ClDirectConv2dKernel::validate(src, weights, nullptr, dst, ClDirectConv2dKernelDescriptor{ conv2d_desc }));
-            const size_t kernel_sz_direct_conv_thr = get_direct_conv_kernel_threshold_nhwc(gpu_target);
-
-            // SRGAN case
-            if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv2d_desc.pad.top < 3)
-               && is_direct_valid)
-            {
-                return ConvolutionMethod::DIRECT;
-            }
-
-            // Floating-point case: GeMM/Direct
-            if(is_data_type_float(src->data_type()))
-            {
-                // Get dst shape
-                TensorShape output_shape       = misc::shape_calculator::compute_deep_convolution_shape(*src, *weights, legacy_pad_stride);
-                const bool  is_large_kernel_sz = (weights->dimension(idx_w) >= kernel_sz_direct_conv_thr) && (weights->dimension(idx_h) >= kernel_sz_direct_conv_thr);
-                const bool  is_ifm_ge_16       = src->dimension(idx_c) >= 16;
-                const bool  is_ofm_lte_8       = weights->dimension(3U) <= 8;
-                const bool  workload_gte_8192  = (output_shape[0] * output_shape[1] * output_shape[2]) / 16 >= 8192;
-                const bool  is_ifm_gt_ofm      = src->dimension(idx_c) > weights->dimension(3U);
-
-                // Direct convolution case
-                if(is_direct_valid)
-                {
-                    if((gpu_target == arm_compute::GPUTarget::G71 || gpu_target == arm_compute::GPUTarget::G72 || gpu_target == arm_compute::GPUTarget::MIDGARD))
-                    {
-                        if(is_large_kernel_sz && is_ifm_ge_16 && is_ifm_gt_ofm)
-                        {
-                            return ConvolutionMethod::DIRECT;
-                        }
-                    }
-                    else
-                    {
-                        if((is_large_kernel_sz && workload_gte_8192 && is_ifm_ge_16) || (is_ofm_lte_8 && is_ifm_ge_16))
-                        {
-                            return ConvolutionMethod::DIRECT;
-                        }
-                    }
-                }
-
-                // Default case
-                return ConvolutionMethod::GEMM;
-            }
-
-            // Generic case for quantized. Only GeMM
-            return ConvolutionMethod::GEMM;
-        }
-    }
-    return ConvolutionMethod::DIRECT;
-}
-
-Status Conv2dContent::translate(ClKernelGraph &kernel_graph) const
-{
-    const auto input  = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
-    const auto weight = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
-    const auto dst    = _tensors.get_const_tensor(TensorType::ACL_DST_0);
-    const auto method = forced_method_enabled ? forced_method : Conv2dContent::select_conv_method(input->desc, weight->desc, dst->desc, desc, CLScheduler::get().target());
-    switch(method)
-    {
-        case ConvolutionMethod::DIRECT:
-        {
-            return translate_direct_conv2d(kernel_graph);
-        }
-        default:
-        {
-            ARM_COMPUTE_RETURN_ERROR_MSG("Not implemented");
-        }
-    }
-    return Status{};
-}
-Status Conv2dContent::translate_direct_conv2d(ClKernelGraph &kernel_graph) const
-{
-    const auto input  = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
-    const auto weight = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
-    const auto bias   = _tensors.get_const_tensor(TensorType::ACL_SRC_2);
-    const auto dst    = _tensors.get_const_tensor(TensorType::ACL_DST_0);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, weight, dst);
-
-    ITensorDescPack<ClKernelTensor> tensors;
-
-    DependencyGraph::Id input_id;
-    auto                st = add_kernel_tensor(kernel_graph, *_graph, *input, input_id);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-    tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(input_id));
-
-    DependencyGraph::Id weight_id;
-    st = add_kernel_tensor(kernel_graph, *_graph, *weight, weight_id);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-    tensors.add_const_tensor(ACL_SRC_1, kernel_graph.get_tensor(weight_id));
-
-    if(bias != nullptr)
-    {
-        DependencyGraph::Id bias_id;
-        st = add_kernel_tensor(kernel_graph, *_graph, *bias, bias_id);
-        ARM_COMPUTE_RETURN_ON_ERROR(st);
-        tensors.add_const_tensor(ACL_SRC_2, kernel_graph.get_tensor(bias_id));
-    }
-
-    DependencyGraph::Id dst_id;
-    st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-    tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id));
-
-    DependencyGraph::Id direct_conv2d_id;
-    const auto          kernel_desc = ClDirectConv2dKernelDescriptor{ desc };
-
-    st = ClDirectConv2dKernel::validate(input->desc, weight->desc, bias == nullptr ? nullptr : bias->desc, dst->desc, kernel_desc);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-
-    ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
-    st = kernel_graph.add_kernel<ClDirectConv2dKernel>(config, kernel_desc, tensors, direct_conv2d_id);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-    ARM_COMPUTE_UNUSED(direct_conv2d_id);
-
-    return Status{};
-}
-
-Status ElementwiseContent::translate(ClKernelGraph &kernel_graph) const
-{
-    const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
-    const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
-    const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
-
-    ITensorDescPack<ClKernelTensor> tensors;
-
-    DependencyGraph::Id lhs_id;
-    auto                st = add_kernel_tensor(kernel_graph, *_graph, *lhs, lhs_id);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-    tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(lhs_id));
-
-    DependencyGraph::Id rhs_id;
-    st = add_kernel_tensor(kernel_graph, *_graph, *rhs, rhs_id);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-    tensors.add_const_tensor(ACL_SRC_1, kernel_graph.get_tensor(rhs_id));
-
-    DependencyGraph::Id dst_id;
-    st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-    tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id));
-
-    DependencyGraph::Id add_id;
-    ClKernelConfig      config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
-
-    st = ClElementwiseKernel::validate(lhs->desc, rhs->desc, dst->desc);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-
-    st = kernel_graph.add_kernel<ClElementwiseKernel>(config, ClElementwiseKernelDescriptor{ desc }, tensors, add_id);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-    ARM_COMPUTE_UNUSED(add_id);
-
-    return Status{};
-}
-
-Status FloorContent::translate(ClKernelGraph &kernel_graph) const
-{
-    const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
-    const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
-
-    ITensorDescPack<ClKernelTensor> tensors;
-
-    DependencyGraph::Id src_id;
-    auto                st = add_kernel_tensor(kernel_graph, *_graph, *src, src_id);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-    tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(src_id));
-
-    DependencyGraph::Id dst_id;
-    st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-    tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id));
-
-    DependencyGraph::Id add_id;
-    ClKernelConfig      config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
-
-    st = ClFloorKernel::validate(src->desc, dst->desc);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-
-    st = kernel_graph.add_kernel<ClFloorKernel>(config, ClFloorKernelDescriptor{ desc }, tensors, add_id);
-    ARM_COMPUTE_RETURN_ON_ERROR(st);
-
-    return Status{};
-}
-
-std::vector<const OperatorContent *> traverse(const OperatorGraph::Implementation &graph)
-{
-    std::vector<const OperatorContent *> ops;
-    const auto                           sorted = graph.graph.topological_sort();
-    for(const auto &pack : sorted.second)
-    {
-        ops.push_back(graph.operators.at(pack.op).get());
-    }
-    return ops;
-}
-
-std::vector<OperatorContent *> traverse(OperatorGraph::Implementation &graph)
-{
-    std::vector<OperatorContent *> ops;
-    const auto                     sorted = graph.graph.topological_sort();
-    for(const auto &pack : sorted.second)
-    {
-        ops.push_back(graph.operators.at(pack.op).get());
-    }
-    return ops;
-}
-
-Status translate(ClKernelGraph &kernel_graph, const OperatorGraph::Implementation &op_graph)
-{
-    for(const auto &op : traverse(op_graph))
-    {
-        const auto st = op->translate(kernel_graph);
-        ARM_COMPUTE_RETURN_ON_ERROR(st);
-    }
-    return Status{};
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
deleted file mode 100644
index b303cdb9fc..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPHIMPL
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPHIMPL
-
-#include "arm_compute/core/experimental/ClWorkload.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h"
-
-#include "support/Cast.h"
-#include "support/DeepCopy.h"
-
-#include <map>
-#include <tuple>
-#include <type_traits>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-enum class OperatorComplexity
-{
-    Complex = 0,
-    Simple
-};
-
-struct ClKernelGraph;
-struct OpTensorContent
-{
-public:
-    using Id          = DependencyGraph::Id;
-    OpTensorContent() = default;
-    OpTensorContent(Id id)
-        : id{ id }, desc{}
-    {
-    }
-    OpTensorContent(Id id, ITensorInfo *desc)
-        : id{ id }, desc{ desc }
-    {
-    }
-    ~OpTensorContent()                       = default;
-    OpTensorContent(const OpTensorContent &) = default;
-    OpTensorContent &operator=(const OpTensorContent &) = default;
-    OpTensorContent(OpTensorContent &&)                 = default;
-    OpTensorContent &operator=(OpTensorContent &&) = default;
-    bool operator==(const OpTensorContent &other) const
-    {
-        return desc == other.desc;
-    }
-
-    const ITensorInfo *get_tensor_info() const
-    {
-        return desc;
-    }
-    ITensorInfo *get_tensor_info()
-    {
-        return desc;
-    }
-
-    Id           id{};
-    ITensorInfo *desc{};
-};
-
-struct OperatorContent
-{
-public:
-    using Id          = DependencyGraph::Id;
-    OperatorContent() = default;
-    OperatorContent(const OperatorGraph::Implementation *graph, Id id, const ITensorDescPack<OpTensorContent> &tensors)
-        : _graph{ graph }, _id{ id }, _tensors{ tensors }
-    {
-    }
-    OperatorContent(const OperatorContent &op) = default;
-    OperatorContent &operator=(const OperatorContent &op) = default;
-    OperatorContent(OperatorContent &&op)                 = default;
-    OperatorContent &operator=(OperatorContent &&op)            = default;
-    virtual ~OperatorContent()                                  = default;
-    virtual OperatorComplexity complexity() const               = 0;
-    virtual bool operator==(const OperatorContent &other) const = 0;
-    virtual Status translate(ClKernelGraph &kernel_graph) const = 0;
-
-protected:
-    const OperatorGraph::Implementation *_graph {};
-    Id                                   _id{};
-    ITensorDescPack<OpTensorContent>     _tensors{};
-};
-
-struct Conv2dContent : public OperatorContent
-{
-public:
-    Conv2dContent() = default;
-    Conv2dContent(const OperatorGraph::Implementation *graph, Id id, const Conv2dDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
-        : OperatorContent(graph, id, tensors), desc(desc), forced_method(), forced_method_enabled(false)
-    {
-    }
-    // Temporary. Do not need to pass ConvolutionMethod
-    Conv2dContent(const OperatorGraph::Implementation *graph, Id id, const Conv2dDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors, ConvolutionMethod method)
-        : OperatorContent(graph, id, tensors), desc(desc), forced_method(method), forced_method_enabled(true)
-    {
-    }
-    ~Conv2dContent()                     = default;
-    Conv2dContent(const Conv2dContent &) = default;
-    Conv2dContent &operator=(const Conv2dContent &) = default;
-    Conv2dContent(Conv2dContent &&)                 = default;
-    Conv2dContent &operator=(Conv2dContent &&) = default;
-    bool operator==(const OperatorContent &other) const override;
-    OperatorComplexity complexity() const override
-    {
-        return OperatorComplexity::Complex;
-    }
-    void set_method(ConvolutionMethod method)
-    {
-        forced_method_enabled = true;
-        forced_method         = method;
-    }
-
-    Status translate(ClKernelGraph &kernel_graph) const override;
-    /** Replicate heuristics of @ref ClConv2d::get_convolution_method(), except that non-supported data types and data layouts are removed from the heuristics
-     *
-     * @param src
-     * @param weights
-     * @param dst
-     * @param conv2d_desc
-     * @param gpu_target
-     * @return ConvolutionMethod
-     */
-    static ConvolutionMethod select_conv_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dDescriptor &conv2d_desc, const GPUTarget gpu_target);
-
-    Conv2dDescriptor  desc{};
-    ConvolutionMethod forced_method{ ConvolutionMethod::GEMM_CONV2D };
-    bool              forced_method_enabled{ false };
-
-private:
-    Status translate_direct_conv2d(ClKernelGraph &kernel_graph) const;
-};
-
-class ElementwiseContent : public OperatorContent
-{
-public:
-    ElementwiseContent() = default;
-    ElementwiseContent(const OperatorGraph::Implementation *graph, Id id, const ElementwiseDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
-        : OperatorContent(graph, id, tensors), desc(desc)
-    {
-    }
-    ~ElementwiseContent()                          = default;
-    ElementwiseContent(const ElementwiseContent &) = default;
-    ElementwiseContent &operator=(const ElementwiseContent &) = default;
-    ElementwiseContent(ElementwiseContent &&)                 = default;
-    ElementwiseContent &operator=(ElementwiseContent &&) = default;
-    bool operator==(const OperatorContent &other) const override;
-    OperatorComplexity complexity() const override
-    {
-        return OperatorComplexity::Simple;
-    }
-    Status translate(ClKernelGraph &kernel_graph) const override;
-
-private:
-    ElementwiseDescriptor desc{};
-};
-
-class FloorContent : public OperatorContent
-{
-public:
-    FloorContent() = default;
-    FloorContent(const OperatorGraph::Implementation *graph, Id id, const FloorDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
-        : OperatorContent(graph, id, tensors), desc(desc)
-    {
-    }
-    ~FloorContent()                    = default;
-    FloorContent(const FloorContent &) = default;
-    FloorContent &operator=(const FloorContent &) = default;
-    FloorContent(FloorContent &&)                 = default;
-    FloorContent &operator=(FloorContent &&) = default;
-    bool operator==(const OperatorContent &other) const override;
-    OperatorComplexity complexity() const override
-    {
-        return OperatorComplexity::Simple;
-    }
-    Status translate(ClKernelGraph &kernel_graph) const override;
-
-private:
-    FloorDescriptor desc{};
-};
-
-struct OperatorGraph::Implementation
-{
-public:
-    template <typename ContentT, typename... Args>
-    void add_node(Operator::Id id, Args &&... args)
-    {
-        operators[id] = utils::memory::make_deep_unique<OperatorContent, ContentT>(this, id, std::forward<Args>(args)...);
-    }
-
-    template <typename... Args>
-    void add_tensor(OpTensor::Id id, Args &&... args)
-    {
-        tensors[id] = utils::memory::make_deep_unique<OpTensorContent, OpTensorContent>(id, std::forward<Args>(args)...);
-    }
-
-    using Dependency  = DependencyGraph;
-    using OperatorMap = std::map<Operator::Id, utils::memory::deep_unique_ptr<OperatorContent>>;
-    using OpTensorMap = std::map<OpTensor::Id, utils::memory::deep_unique_ptr<OpTensorContent>>;
-
-    Implementation()  = default;
-    ~Implementation() = default;
-
-    friend bool operator==(const OperatorGraph::Implementation &graph0, const OperatorGraph::Implementation &graph1)
-    {
-        return graph0.graph == graph1.graph && graph0.operators == graph1.operators && graph0.tensors == graph1.tensors;
-    }
-
-    Dependency  graph{};
-    OperatorMap operators{};
-    OpTensorMap tensors{};
-    Status      status{};
-};
-
-std::vector<const OperatorContent *> traverse(const OperatorGraph::Implementation &graph);
-
-std::vector<OperatorContent *> traverse(OperatorGraph::Implementation &graph);
-
-Status translate(ClKernelGraph &kernel_graph, const OperatorGraph::Implementation &op_graph);
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPHIMPL
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file
author	SiCong Li <sicong.li@arm.com>	2022-11-09 15:57:48 +0000
committer	SiCong Li <sicong.li@arm.com>	2022-11-22 14:09:34 +0000
commit	31df05a1870662a7288fbaeb6fbc7fc458bb5a73 (patch)
tree	e75a132b8b5fd21cbceec8d0aa88da893e9c4f43 /src/core/experimental/dynamic_fusion
parent	73bb6b7ad80801e56633ad4ea12b0404b586a979 (diff)
download	ComputeLibrary-31df05a1870662a7288fbaeb6fbc7fc458bb5a73.tar.gz