From 31df05a1870662a7288fbaeb6fbc7fc458bb5a73 Mon Sep 17 00:00:00 2001
From: SiCong Li <sicong.li@arm.com>
Date: Wed, 9 Nov 2022 15:57:48 +0000
Subject: Remove dynamic fusion prototype with tests and examples

Public headers of the new experimental dynamic fusion can be found in arm_compute/dynamic_fusion/
New examples on how to use the interface can be found in tests/validation/dynamic_fusion/gpu/Integration.cpp

Resolves COMPMID-5683

Change-Id: I7ccb902a227fb487562df15fc3c30118d1d95bbd
Signed-off-by: SiCong Li <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8671
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 .../WorkloadImpl/ClFusedKernelGraph.h              | 452 ---------------------
 1 file changed, 452 deletions(-)
 delete mode 100644 src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h

(limited to 'src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h')
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h
deleted file mode 100644
index 2051f1b62f..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/experimental/DependencyGraph.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h"
-#include "support/DeepCopy.h"
-
-#include <vector>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-struct ClKernelFusionGroup;
-
-/** A const view of a subgraph of the @ref ClKernelGraph to be fused together
- *
- */
-struct ClKernelFusionGroup
-{
-public:
-    using Id = DependencyGraph::Id;
-
-    ClKernelFusionGroup() = default;
-    ClKernelFusionGroup(Id id)
-        : id{ id }, graph{}, fused_kernels{}, tensors{}
-    {
-    }
-    ~ClKernelFusionGroup() = default;
-
-    void set_id(Id i)
-    {
-        id = i;
-    }
-
-    Id add_fused_kernel(const ClKernel *kernel)
-    {
-        /// PRE: Acyclicity ensured by DependencyGraph
-        /// PRE: Connectedness ensured by DependencyGraph
-        /// PRE: Single-rootedness ensured by User
-        std::vector<Id> src_tensors;
-        for(const auto t : kernel->tensors().get_const_src_tensors())
-        {
-            auto id = graph.add_tensor(t->id);
-            if(tensors.find(id) == tensors.end())
-            {
-                tensors[id] = t;
-            }
-            src_tensors.push_back(id);
-        }
-        std::vector<Id> dst_tensors;
-        for(const auto t : kernel->tensors().get_const_dst_tensors())
-        {
-            auto id = graph.add_tensor(t->id);
-            if(tensors.find(id) == tensors.end())
-            {
-                tensors[id] = t;
-            }
-            dst_tensors.push_back(id);
-        }
-        auto id                  = graph.add_operator(src_tensors, dst_tensors);
-        fused_kernels[id.second] = kernel;
-        return id.second;
-    }
-
-    const ClKernel *get_root_kernel() const
-    {
-        auto root_kernels = graph.get_root_ops();
-        ARM_COMPUTE_ERROR_ON(root_kernels.size() != 1);
-        return fused_kernels.at(root_kernels.at(0));
-    }
-
-    std::vector<const ClKernelTensor *> get_src_tensors() const
-    {
-        std::vector<const ClKernelTensor *> src_tensors;
-        for(auto tensor_id : graph.src_tensors())
-        {
-            src_tensors.push_back(tensors.at(tensor_id));
-        }
-        return src_tensors;
-    }
-
-    std::vector<const ClKernelTensor *> get_dst_tensors() const
-    {
-        std::vector<const ClKernelTensor *> dst_tensors;
-        for(auto tensor_id : graph.dst_tensors())
-        {
-            dst_tensors.push_back(tensors.at(tensor_id));
-        }
-        return dst_tensors;
-    }
-
-    friend bool operator==(const ClKernelFusionGroup &fg0, const ClKernelFusionGroup &fg1)
-    {
-        return fg0.id == fg1.id && fg0.graph == fg1.graph && fg0.fused_kernels == fg1.fused_kernels && fg0.tensors == fg1.tensors;
-    }
-
-    Id              id{};
-    DependencyGraph graph{}; // A subgraph of the original ClKernelGraph
-    std::map<Id, const ClKernel *>       fused_kernels{};
-    std::map<Id, const ClKernelTensor *> tensors{};
-};
-
-std::vector<const ClKernel *> traverse(const ClKernelFusionGroup &group);
-
-struct ClFusedKernelGraph
-{
-public:
-    using Id = DependencyGraph::Id;
-
-    using KernelFusionGroupMap = std::map<Id, utils::memory::deep_unique_ptr<ClKernelFusionGroup>>;
-
-    ClFusedKernelGraph()                                = default;
-    ~ClFusedKernelGraph()                               = default;
-    ClFusedKernelGraph(const ClFusedKernelGraph &graph) = default;
-    ClFusedKernelGraph &operator=(const ClFusedKernelGraph &graph) = default;
-    ClFusedKernelGraph(ClFusedKernelGraph &&graph)                 = default;
-    ClFusedKernelGraph &operator=(ClFusedKernelGraph &&graph) = default;
-
-    friend bool operator==(const ClFusedKernelGraph &graph0, const ClFusedKernelGraph &graph1)
-    {
-        /// NOTE: fg_dependency may change based on the order of fusion, and thus is omitted in the comparison.
-        ///       The fusion groups can already guarantee the equivalence of fusion
-        ///       In the future we may want to enforce a stronger equivalence by implementing topological comparison between @ref DependencyGraph s
-        return graph0.original_graph == graph1.original_graph && graph0.fusion_groups == graph1.fusion_groups;
-    }
-
-    Id add_fusion_group(const std::vector<const ClKernel *> &fused_kernels)
-    {
-        auto fg = utils::memory::make_deep_unique<ClKernelFusionGroup, ClKernelFusionGroup>();
-        for(const auto k : fused_kernels)
-        {
-            fg->add_fused_kernel(k);
-        }
-        const auto      src_tensors = fg->get_src_tensors();
-        const auto      dst_tensors = fg->get_dst_tensors();
-        std::vector<Id> inputs{};
-        std::transform(std::begin(src_tensors), std::end(src_tensors), std::back_inserter(inputs), [this](auto kernel)
-        {
-            return fg_dependency.add_tensor(kernel->id);
-        });
-        std::vector<Id> outputs{};
-        std::transform(std::begin(dst_tensors), std::end(dst_tensors), std::back_inserter(outputs), [this](auto kernel)
-        {
-            return fg_dependency.add_tensor(kernel->id);
-        });
-        const auto id = fg_dependency.add_operator(inputs, outputs);
-        fg->set_id(id.second);
-        fusion_groups[id.second] = std::move(fg);
-        return id.second;
-    }
-
-    Status fuse(ClKernelFusionGroup &fg0, ClKernelFusionGroup &fg1)
-    {
-        /// PRE: Already checked by can_fuse, and thus all the INVs and ASSUMPTIONS still hold
-        ClKernelFusionGroup *fg_src{};
-        ClKernelFusionGroup *fg_dst{};
-        // Find fg_src (parent / root) and fg_dst (child / non-root)
-        if(is_in(fg1.id, fg_dependency.dst_ops(fg0.id)))
-        {
-            fg_src = &fg0;
-            fg_dst = &fg1;
-        }
-        else if(is_in(fg0.id, fg_dependency.dst_ops(fg1.id)))
-        {
-            fg_src = &fg1;
-            fg_dst = &fg0;
-        }
-        else
-        {
-            return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: Not directly connected fusion groups cannot be fused together" };
-        }
-
-        for(const auto &t : fg_dependency.src_tensors(fg_dst->id))
-        {
-            if(!is_in(t, fg_dependency.dst_tensors(fg_src->id)))
-            {
-                // Link any incoming tensors of fg_dst, that ARE NOT in between fg_src and fg_dst, to fg_src
-
-                // Before:
-                // fg_src
-                // |
-                // ..          t1
-                // |           |
-                // -> fg_dst <-
-                //
-                // After:
-                // fg_src <---t1
-                //
-                const auto st = link_src_tensors(fg_src->id, { t });
-                if(!bool(st))
-                {
-                    return st;
-                }
-            }
-            else
-            {
-                const auto dst_fgs = fg_dependency.dst_ops_from_tensor(t);
-                if(dst_fgs.size() == 1U && dst_fgs.at(0) == fg_dst->id)
-                {
-                    // Remove any incoming tensors of fg_dst, that ARE in between fg_src and fg_dst
-                    // AND that are not connected to any other outgoing fgs (Note that they cannot connect to any other incoming fgs as all tensors can have at most 1 incoming fg (ASSUMPTION 3))
-
-                    // Before:
-                    // fg_src
-                    // |
-                    // t0
-                    // |
-                    // -> fg_dst
-                    //
-                    // After:
-                    // fg_src
-                    //
-                    const auto st = remove_fg_tensor(t);
-                    if(!bool(st))
-                    {
-                        return st;
-                    }
-                }
-                else
-                {
-                    // If the tensors ARE in between fg_src and fg_dst
-                    // BUT have any other outgoing fgs than fg_dst, then we leave it as a dst tensor to the fused fg_src
-
-                    // Before:
-                    // fg_src
-                    // |
-                    // t0
-                    // |
-                    // |-----------
-                    // |          |
-                    // -> fg_dst  -> fg_other
-                    //
-                    // After:
-                    // fg_src
-                    // |
-                    // t0
-                    // |
-                    // -> fg_other
-                    //
-
-                    // Note that this may seem like a case we shouldn't fuse. But actually all it means is that t0 is an
-                    // intermediate tensor between the fused fg_src and fg_dst, but only that we also STORE it to memory
-                    // so that any unfused fg's (fg_other in this case) can read it.
-                    // So all this means that we not only can STORE the tensors at the "end" of a fusion group,
-                    // but also any other tensors that are not source tensors. And all tensors that are STORED (exported),
-                    // can be termed "dst tensors" to a fusion group
-                    void();
-                }
-            }
-        }
-
-        for(const auto &t : fg_dependency.dst_tensors(fg_dst->id))
-        {
-            // Link any outgoing tensors of fg_dst to fg_src
-
-            // Before:
-            // fg_src
-            // |
-            // ..
-            // |
-            // -> fg_dst
-            //    |
-            //    |--------
-            //    |       |
-            //    |-> t0  |-> t1
-            //
-            // After:
-            // fg_src
-            // |
-            // |--------
-            // |       |
-            // |-> t0  |-> t1
-            //
-            const auto st = link_dst_tensors(fg_src->id, { t });
-            if(!bool(st))
-            {
-                return st;
-            }
-        }
-
-        // Merge fg_dst's graph into fg_src's graph
-        for(const auto kernel : traverse(*fg_dst))
-        {
-            fg_src->add_fused_kernel(kernel);
-        }
-
-        const auto st = remove_fg(fg_dst->id);
-        return st;
-    }
-    Status can_fuse(const ClKernelFusionGroup &fg0, const ClKernelFusionGroup &fg1) const
-    {
-        /// ASSUMPTION0: All tensors have 0 or 1 incoming kernel
-        /// ASSUMPTION1: All kernels have exactly 1 dst tensor (Temporary, can be lifted once we start supporting multi-dst kernels)
-        ///              Note that this does not apply to fusion groups
-        /// ASSUMPTION2: Simple kernels' tile infos can be overriden (share with) that of the root kernel's
-        /// ASSUMPTION3: Extension of ASSUMPTION0: All tensors have 0 or 1 incoming fusion group
-        /// INV0: All Fusion groups have a single root
-        /// INV1: All Fusion groups have no cycles or loops within themselves <- guaranteed by the underlying ClKernelGraph having no cycles or loops; enforced by DependencyGraph
-        /// INV2: The ClKernelFusionGroup itself has no cycles or loops <- enforced by DependencyGraph
-        /// INV3: All non-roots are Simple kernels
-        /// INV4: All non roots' dst tensors have the same shape as that of the root kernel
-        /// INV5: All kernels within a fusion group have the same UnitWorkloadStage
-        const ClKernelFusionGroup *fg_src {};
-        const ClKernelFusionGroup *fg_dst{};
-
-        // Check 0: Ensure fg0 and fg1 are "directly connected": one of them is a direct parent of the other
-        // This guarantess INV0
-        // This also finds fg_src (parent / root) and fg_dst (child / non-root)
-        if(is_in(fg1.id, fg_dependency.dst_ops(fg0.id)))
-        {
-            fg_src = &fg0;
-            fg_dst = &fg1;
-        }
-        else if(is_in(fg0.id, fg_dependency.dst_ops(fg1.id)))
-        {
-            fg_src = &fg1;
-            fg_dst = &fg0;
-        }
-        else
-        {
-            return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: Not directly connected fusion groups cannot be fused together" };
-        }
-
-        // Find unconnected tensors between fg_src and fg_dst
-        std::vector<Id> unconnected_tensors{};
-        for(const auto &t : fg_dependency.dst_tensors(fg_src->id))
-        {
-            if(!is_in(t, fg_dependency.src_tensors(fg_dst->id)))
-            {
-                unconnected_tensors.push_back(t);
-            }
-        }
-
-        // Check 1: Any unconnected tensor cannot be an ancestor of fg_dst
-        // This guarantees INV2: That is, the fused graph does not have any cycles or loops between different fusion groups
-        for(const auto &t : unconnected_tensors)
-        {
-            if(fg_dependency.path_exists_from_tensor_to_op(t, fg_dst->id))
-            {
-                return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: the fusion would result in cycles or loops" };
-            }
-        }
-
-        // Check 2: All non-root fgs are simple. Ensure INV3
-        if(fg_dst->get_root_kernel()->complexity() != Complexity::Simple)
-        {
-            return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: only root kernel can be a complex kernel" };
-        }
-
-        // Check 3: All non roots' dst tensors have the same shape as that of the root kernel. Ensure INV4
-        const auto root_kernel_dst_tensors = fg_dependency.dst_tensors(fg_src->id);
-        ARM_COMPUTE_ERROR_ON(root_kernel_dst_tensors.size() != 1); // (ASSUMPTION 1: All kernels have exactly 1 dst tensor)
-        const auto root_kernel_dst_tensor_info = original_graph->get_tensor(root_kernel_dst_tensors[0])->desc;
-
-        for(const auto &t : fg_dependency.dst_tensors(fg_dst->id))
-        {
-            const auto t_info = original_graph->get_tensor(t)->desc;
-            if(detail::have_different_dimensions(root_kernel_dst_tensor_info->tensor_shape(), t_info->tensor_shape(), 0))
-            {
-                return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: all non roots' dst tensors should have the same shape as that of the root kernel" };
-            }
-        }
-
-        // Check 4: All kernels within a fg have the same UnitWorkloadStage. Ensure INV5
-        if(!(fg_src->get_root_kernel()->config().stage == fg_dst->get_root_kernel()->config().stage))
-        {
-            return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: all kernels within a fusion group should have the same UnitWorkloadStage" };
-        }
-
-        return Status{};
-    }
-
-    const ClKernelGraph *original_graph{};
-    DependencyGraph      fg_dependency{};
-    KernelFusionGroupMap fusion_groups{};
-    // Note: no need to store tensors pointers in the ClFusedKernelGraph, as they are stored in side the individual fusion groups.
-
-private:
-    Status link_src_tensors(Id fg, const std::vector<Id> &src_tensors)
-    {
-        for(auto t : src_tensors)
-        {
-            fg_dependency.link_input(fg, t);
-        }
-        return Status{};
-    }
-    Status link_dst_tensors(Id fg, const std::vector<Id> &dst_tensors)
-    {
-        for(auto t : dst_tensors)
-        {
-            fg_dependency.link_output(fg, t);
-        }
-        return Status{};
-    }
-    Status remove_fg(Id fg)
-    {
-        fg_dependency.remove_operator(fg);
-        fusion_groups.erase(fg);
-        return Status{};
-    }
-    Status remove_fg_tensor(Id tensor)
-    {
-        fg_dependency.remove_tensor(tensor);
-        return Status{};
-    }
-};
-
-std::vector<const ClKernelFusionGroup *> traverse(const ClFusedKernelGraph &graph);
-std::vector<ClKernelFusionGroup *> traverse(ClFusedKernelGraph &graph);
-
-std::pair<Status, ClFusedKernelGraph> init_fusion_graph(const ClKernelGraph &kernel_graph);
-
-Status fuse(ClFusedKernelGraph &fused_kernel_graph);
-
-Status generate_store(ClKernelBlueprint &bp, const ClFusedKernelGraph &fused_kernel_graph, const ClKernelFusionGroup &fg);
-
-Status generate(ClWorkload &workload, const ClWorkloadContext &ctx, const ClFusedKernelGraph &fused_kernel_graph);
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
\ No newline at end of file
-- 
cgit v1.2.1