From 31df05a1870662a7288fbaeb6fbc7fc458bb5a73 Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Wed, 9 Nov 2022 15:57:48 +0000 Subject: Remove dynamic fusion prototype with tests and examples Public headers of the new experimental dynamic fusion can be found in arm_compute/dynamic_fusion/ New examples on how to use the interface can be found in tests/validation/dynamic_fusion/gpu/Integration.cpp Resolves COMPMID-5683 Change-Id: I7ccb902a227fb487562df15fc3c30118d1d95bbd Signed-off-by: SiCong Li Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8671 Reviewed-by: Jakub Sujak Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins Tested-by: Arm Jenkins --- .../WorkloadImpl/ClFusedKernelGraph.h | 452 --------------------- 1 file changed, 452 deletions(-) delete mode 100644 src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h (limited to 'src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h') diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h deleted file mode 100644 index 2051f1b62f..0000000000 --- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h +++ /dev/null @@ -1,452 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/experimental/DependencyGraph.h" -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h" -#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h" -#include "support/DeepCopy.h" - -#include - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -struct ClKernelFusionGroup; - -/** A const view of a subgraph of the @ref ClKernelGraph to be fused together - * - */ -struct ClKernelFusionGroup -{ -public: - using Id = DependencyGraph::Id; - - ClKernelFusionGroup() = default; - ClKernelFusionGroup(Id id) - : id{ id }, graph{}, fused_kernels{}, tensors{} - { - } - ~ClKernelFusionGroup() = default; - - void set_id(Id i) - { - id = i; - } - - Id add_fused_kernel(const ClKernel *kernel) - { - /// PRE: Acyclicity ensured by DependencyGraph - /// PRE: Connectedness ensured by DependencyGraph - /// PRE: Single-rootedness ensured by User - std::vector src_tensors; - for(const auto t : kernel->tensors().get_const_src_tensors()) - { - auto id = graph.add_tensor(t->id); - if(tensors.find(id) == tensors.end()) - { - tensors[id] = t; - } - src_tensors.push_back(id); - } - std::vector dst_tensors; - for(const auto t : kernel->tensors().get_const_dst_tensors()) - { - auto id = graph.add_tensor(t->id); - if(tensors.find(id) == tensors.end()) - { - tensors[id] = t; - } - dst_tensors.push_back(id); - } - auto id = graph.add_operator(src_tensors, dst_tensors); - fused_kernels[id.second] = kernel; - return id.second; - } - - const ClKernel *get_root_kernel() const - { - auto root_kernels = graph.get_root_ops(); - ARM_COMPUTE_ERROR_ON(root_kernels.size() != 1); - return fused_kernels.at(root_kernels.at(0)); - } - - std::vector get_src_tensors() const - { - std::vector src_tensors; - for(auto tensor_id : graph.src_tensors()) - { - src_tensors.push_back(tensors.at(tensor_id)); - } - return src_tensors; - } - - std::vector get_dst_tensors() const - { - std::vector dst_tensors; - for(auto tensor_id : graph.dst_tensors()) - { - dst_tensors.push_back(tensors.at(tensor_id)); - } - return dst_tensors; - } - - friend bool operator==(const ClKernelFusionGroup &fg0, const ClKernelFusionGroup &fg1) - { - return fg0.id == fg1.id && fg0.graph == fg1.graph && fg0.fused_kernels == fg1.fused_kernels && fg0.tensors == fg1.tensors; - } - - Id id{}; - DependencyGraph graph{}; // A subgraph of the original ClKernelGraph - std::map fused_kernels{}; - std::map tensors{}; -}; - -std::vector traverse(const ClKernelFusionGroup &group); - -struct ClFusedKernelGraph -{ -public: - using Id = DependencyGraph::Id; - - using KernelFusionGroupMap = std::map>; - - ClFusedKernelGraph() = default; - ~ClFusedKernelGraph() = default; - ClFusedKernelGraph(const ClFusedKernelGraph &graph) = default; - ClFusedKernelGraph &operator=(const ClFusedKernelGraph &graph) = default; - ClFusedKernelGraph(ClFusedKernelGraph &&graph) = default; - ClFusedKernelGraph &operator=(ClFusedKernelGraph &&graph) = default; - - friend bool operator==(const ClFusedKernelGraph &graph0, const ClFusedKernelGraph &graph1) - { - /// NOTE: fg_dependency may change based on the order of fusion, and thus is omitted in the comparison. - /// The fusion groups can already guarantee the equivalence of fusion - /// In the future we may want to enforce a stronger equivalence by implementing topological comparison between @ref DependencyGraph s - return graph0.original_graph == graph1.original_graph && graph0.fusion_groups == graph1.fusion_groups; - } - - Id add_fusion_group(const std::vector &fused_kernels) - { - auto fg = utils::memory::make_deep_unique(); - for(const auto k : fused_kernels) - { - fg->add_fused_kernel(k); - } - const auto src_tensors = fg->get_src_tensors(); - const auto dst_tensors = fg->get_dst_tensors(); - std::vector inputs{}; - std::transform(std::begin(src_tensors), std::end(src_tensors), std::back_inserter(inputs), [this](auto kernel) - { - return fg_dependency.add_tensor(kernel->id); - }); - std::vector outputs{}; - std::transform(std::begin(dst_tensors), std::end(dst_tensors), std::back_inserter(outputs), [this](auto kernel) - { - return fg_dependency.add_tensor(kernel->id); - }); - const auto id = fg_dependency.add_operator(inputs, outputs); - fg->set_id(id.second); - fusion_groups[id.second] = std::move(fg); - return id.second; - } - - Status fuse(ClKernelFusionGroup &fg0, ClKernelFusionGroup &fg1) - { - /// PRE: Already checked by can_fuse, and thus all the INVs and ASSUMPTIONS still hold - ClKernelFusionGroup *fg_src{}; - ClKernelFusionGroup *fg_dst{}; - // Find fg_src (parent / root) and fg_dst (child / non-root) - if(is_in(fg1.id, fg_dependency.dst_ops(fg0.id))) - { - fg_src = &fg0; - fg_dst = &fg1; - } - else if(is_in(fg0.id, fg_dependency.dst_ops(fg1.id))) - { - fg_src = &fg1; - fg_dst = &fg0; - } - else - { - return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: Not directly connected fusion groups cannot be fused together" }; - } - - for(const auto &t : fg_dependency.src_tensors(fg_dst->id)) - { - if(!is_in(t, fg_dependency.dst_tensors(fg_src->id))) - { - // Link any incoming tensors of fg_dst, that ARE NOT in between fg_src and fg_dst, to fg_src - - // Before: - // fg_src - // | - // .. t1 - // | | - // -> fg_dst <- - // - // After: - // fg_src <---t1 - // - const auto st = link_src_tensors(fg_src->id, { t }); - if(!bool(st)) - { - return st; - } - } - else - { - const auto dst_fgs = fg_dependency.dst_ops_from_tensor(t); - if(dst_fgs.size() == 1U && dst_fgs.at(0) == fg_dst->id) - { - // Remove any incoming tensors of fg_dst, that ARE in between fg_src and fg_dst - // AND that are not connected to any other outgoing fgs (Note that they cannot connect to any other incoming fgs as all tensors can have at most 1 incoming fg (ASSUMPTION 3)) - - // Before: - // fg_src - // | - // t0 - // | - // -> fg_dst - // - // After: - // fg_src - // - const auto st = remove_fg_tensor(t); - if(!bool(st)) - { - return st; - } - } - else - { - // If the tensors ARE in between fg_src and fg_dst - // BUT have any other outgoing fgs than fg_dst, then we leave it as a dst tensor to the fused fg_src - - // Before: - // fg_src - // | - // t0 - // | - // |----------- - // | | - // -> fg_dst -> fg_other - // - // After: - // fg_src - // | - // t0 - // | - // -> fg_other - // - - // Note that this may seem like a case we shouldn't fuse. But actually all it means is that t0 is an - // intermediate tensor between the fused fg_src and fg_dst, but only that we also STORE it to memory - // so that any unfused fg's (fg_other in this case) can read it. - // So all this means that we not only can STORE the tensors at the "end" of a fusion group, - // but also any other tensors that are not source tensors. And all tensors that are STORED (exported), - // can be termed "dst tensors" to a fusion group - void(); - } - } - } - - for(const auto &t : fg_dependency.dst_tensors(fg_dst->id)) - { - // Link any outgoing tensors of fg_dst to fg_src - - // Before: - // fg_src - // | - // .. - // | - // -> fg_dst - // | - // |-------- - // | | - // |-> t0 |-> t1 - // - // After: - // fg_src - // | - // |-------- - // | | - // |-> t0 |-> t1 - // - const auto st = link_dst_tensors(fg_src->id, { t }); - if(!bool(st)) - { - return st; - } - } - - // Merge fg_dst's graph into fg_src's graph - for(const auto kernel : traverse(*fg_dst)) - { - fg_src->add_fused_kernel(kernel); - } - - const auto st = remove_fg(fg_dst->id); - return st; - } - Status can_fuse(const ClKernelFusionGroup &fg0, const ClKernelFusionGroup &fg1) const - { - /// ASSUMPTION0: All tensors have 0 or 1 incoming kernel - /// ASSUMPTION1: All kernels have exactly 1 dst tensor (Temporary, can be lifted once we start supporting multi-dst kernels) - /// Note that this does not apply to fusion groups - /// ASSUMPTION2: Simple kernels' tile infos can be overriden (share with) that of the root kernel's - /// ASSUMPTION3: Extension of ASSUMPTION0: All tensors have 0 or 1 incoming fusion group - /// INV0: All Fusion groups have a single root - /// INV1: All Fusion groups have no cycles or loops within themselves <- guaranteed by the underlying ClKernelGraph having no cycles or loops; enforced by DependencyGraph - /// INV2: The ClKernelFusionGroup itself has no cycles or loops <- enforced by DependencyGraph - /// INV3: All non-roots are Simple kernels - /// INV4: All non roots' dst tensors have the same shape as that of the root kernel - /// INV5: All kernels within a fusion group have the same UnitWorkloadStage - const ClKernelFusionGroup *fg_src {}; - const ClKernelFusionGroup *fg_dst{}; - - // Check 0: Ensure fg0 and fg1 are "directly connected": one of them is a direct parent of the other - // This guarantess INV0 - // This also finds fg_src (parent / root) and fg_dst (child / non-root) - if(is_in(fg1.id, fg_dependency.dst_ops(fg0.id))) - { - fg_src = &fg0; - fg_dst = &fg1; - } - else if(is_in(fg0.id, fg_dependency.dst_ops(fg1.id))) - { - fg_src = &fg1; - fg_dst = &fg0; - } - else - { - return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: Not directly connected fusion groups cannot be fused together" }; - } - - // Find unconnected tensors between fg_src and fg_dst - std::vector unconnected_tensors{}; - for(const auto &t : fg_dependency.dst_tensors(fg_src->id)) - { - if(!is_in(t, fg_dependency.src_tensors(fg_dst->id))) - { - unconnected_tensors.push_back(t); - } - } - - // Check 1: Any unconnected tensor cannot be an ancestor of fg_dst - // This guarantees INV2: That is, the fused graph does not have any cycles or loops between different fusion groups - for(const auto &t : unconnected_tensors) - { - if(fg_dependency.path_exists_from_tensor_to_op(t, fg_dst->id)) - { - return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: the fusion would result in cycles or loops" }; - } - } - - // Check 2: All non-root fgs are simple. Ensure INV3 - if(fg_dst->get_root_kernel()->complexity() != Complexity::Simple) - { - return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: only root kernel can be a complex kernel" }; - } - - // Check 3: All non roots' dst tensors have the same shape as that of the root kernel. Ensure INV4 - const auto root_kernel_dst_tensors = fg_dependency.dst_tensors(fg_src->id); - ARM_COMPUTE_ERROR_ON(root_kernel_dst_tensors.size() != 1); // (ASSUMPTION 1: All kernels have exactly 1 dst tensor) - const auto root_kernel_dst_tensor_info = original_graph->get_tensor(root_kernel_dst_tensors[0])->desc; - - for(const auto &t : fg_dependency.dst_tensors(fg_dst->id)) - { - const auto t_info = original_graph->get_tensor(t)->desc; - if(detail::have_different_dimensions(root_kernel_dst_tensor_info->tensor_shape(), t_info->tensor_shape(), 0)) - { - return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: all non roots' dst tensors should have the same shape as that of the root kernel" }; - } - } - - // Check 4: All kernels within a fg have the same UnitWorkloadStage. Ensure INV5 - if(!(fg_src->get_root_kernel()->config().stage == fg_dst->get_root_kernel()->config().stage)) - { - return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: all kernels within a fusion group should have the same UnitWorkloadStage" }; - } - - return Status{}; - } - - const ClKernelGraph *original_graph{}; - DependencyGraph fg_dependency{}; - KernelFusionGroupMap fusion_groups{}; - // Note: no need to store tensors pointers in the ClFusedKernelGraph, as they are stored in side the individual fusion groups. - -private: - Status link_src_tensors(Id fg, const std::vector &src_tensors) - { - for(auto t : src_tensors) - { - fg_dependency.link_input(fg, t); - } - return Status{}; - } - Status link_dst_tensors(Id fg, const std::vector &dst_tensors) - { - for(auto t : dst_tensors) - { - fg_dependency.link_output(fg, t); - } - return Status{}; - } - Status remove_fg(Id fg) - { - fg_dependency.remove_operator(fg); - fusion_groups.erase(fg); - return Status{}; - } - Status remove_fg_tensor(Id tensor) - { - fg_dependency.remove_tensor(tensor); - return Status{}; - } -}; - -std::vector traverse(const ClFusedKernelGraph &graph); -std::vector traverse(ClFusedKernelGraph &graph); - -std::pair init_fusion_graph(const ClKernelGraph &kernel_graph); - -Status fuse(ClFusedKernelGraph &fused_kernel_graph); - -Status generate_store(ClKernelBlueprint &bp, const ClFusedKernelGraph &fused_kernel_graph, const ClKernelFusionGroup &fg); - -Status generate(ClWorkload &workload, const ClWorkloadContext &ctx, const ClFusedKernelGraph &fused_kernel_graph); - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file -- cgit v1.2.1