aboutsummaryrefslogtreecommitdiff
path: root/src/core/experimental/dynamic_fusion/WorkloadImpl
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/experimental/dynamic_fusion/WorkloadImpl')
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.cpp232
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h452
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h121
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp271
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h259
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ClWorkload.cpp72
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/DependencyGraph.cpp430
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h241
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp423
-rw-r--r--src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h252
10 files changed, 0 insertions, 2753 deletions
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.cpp
deleted file mode 100644
index 4e57d66a1c..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-namespace
-{
-std::vector<std::pair<ClKernelFusionGroup *, ClKernelFusionGroup *>> get_combinations(const std::vector<ClKernelFusionGroup *> &sorted_fgs)
-{
- ARM_COMPUTE_ERROR_ON(sorted_fgs.size() <= 1);
- std::vector<std::pair<ClKernelFusionGroup *, ClKernelFusionGroup *>> combo;
- for(size_t i = 0; i < sorted_fgs.size() - 1; ++i)
- {
- for(size_t j = i + 1; j < sorted_fgs.size(); ++j)
- {
- combo.push_back(std::make_pair(sorted_fgs.at(i), sorted_fgs.at(j)));
- }
- }
- return combo;
-}
-} // namespace
-std::vector<const ClKernel *> traverse(const ClKernelFusionGroup &group)
-{
- std::vector<const ClKernel *> kernels;
- const auto sorted = group.graph.topological_sort();
- for(const auto &pack : sorted.second)
- {
- kernels.push_back(group.fused_kernels.at(pack.op));
- }
- return kernels;
-}
-
-std::vector<const ClKernelFusionGroup *> traverse(const ClFusedKernelGraph &graph)
-{
- std::vector<const ClKernelFusionGroup *> kernels;
- const auto sorted = graph.fg_dependency.topological_sort();
- for(const auto &pack : sorted.second)
- {
- kernels.push_back(graph.fusion_groups.at(pack.op).get());
- }
- return kernels;
-}
-
-std::vector<ClKernelFusionGroup *> traverse(ClFusedKernelGraph &graph)
-{
- std::vector<ClKernelFusionGroup *> kernels;
- const auto sorted = graph.fg_dependency.topological_sort();
- for(const auto &pack : sorted.second)
- {
- kernels.push_back(graph.fusion_groups.at(pack.op).get());
- }
- return kernels;
-}
-
-std::pair<Status, ClFusedKernelGraph> init_fusion_graph(const ClKernelGraph &kernel_graph)
-{
- ClFusedKernelGraph fused_kernel_graph{};
- fused_kernel_graph.original_graph = &kernel_graph; // Create a copy of the original kernel graph
- fused_kernel_graph.fg_dependency = DependencyGraph();
- // Initialize all fusion groups
- for(const auto &kernel : traverse(kernel_graph))
- {
- fused_kernel_graph.add_fusion_group({ kernel });
- }
- return { Status{}, fused_kernel_graph };
-}
-
-Status fuse(ClFusedKernelGraph &fused_kernel_graph)
-{
- // A naive fusion algorithm that's guaranteed to find optimal pattern if there are no branches
- // If there are branches, the algorithm cannot guanrantee optimality as it doesn't perform any searches
-
- bool fusion_found = false;
- do
- {
- fusion_found = false;
- const auto sorted_fgs = traverse(fused_kernel_graph);
- if(sorted_fgs.size() <= 1)
- {
- // Only one or zero fusion group, thus no need to perform fusion
- return Status{};
- }
- auto fgs_combo = get_combinations(sorted_fgs);
- for(auto fgs : fgs_combo)
- {
- auto fg0 = fgs.first;
- auto fg1 = fgs.second;
- const auto st = fused_kernel_graph.can_fuse(*fg0, *fg1);
- if(bool(st))
- {
- const auto st = fused_kernel_graph.fuse(*fg0, *fg1);
- if(!bool(st))
- {
- return st;
- }
- fusion_found = true;
- break;
- }
- }
- }
- while(fusion_found);
- return Status{};
-}
-Status generate_store(ClKernelBlueprint &bp, const ClFusedKernelGraph &fused_kernel_graph, const ClKernelFusionGroup &fg)
-{
- Status st{};
- for(const auto &dst_t_id : fused_kernel_graph.fg_dependency.dst_tensors(fg.id))
- {
- const auto dst_t = fused_kernel_graph.original_graph->get_tensor(dst_t_id);
-
- /// NOTE: dst tensor must have already been added to the blueprint at this point
- ArgumentID dst_id;
- st = add_tensor(bp, dst_t->desc, dst_id, dst_t->id);
- if(!bool(st))
- {
- return st;
- }
- /// NOTE: the extra dst tensor is needed as the store kcomp requires 2 tensors. But this is irrelevant to the fused kernel graph
- /// since both tensors share the exact same info and kernel arg descriptor
- ArgumentID dst_dst_id;
- st = add_tensor(bp, dst_t->desc, dst_dst_id);
- if(!bool(st))
- {
- return st;
- }
- /// NOTE: Update the merge point map to link dst_dst_id with dst_t->id instead.
- /// This is required because the get_arguments() returned by the blueprint returns the dst tensor added by the store component
- st = update_merge_point(bp, dst_dst_id, dst_t->id);
- if(!bool(st))
- {
- return st;
- }
- st = add_kcomp_store(bp, fg.get_root_kernel()->config().store_type, dst_id, dst_dst_id);
- if(!bool(st))
- {
- return st;
- }
- }
- return st;
-}
-
-Status generate(ClWorkload &workload, const ClWorkloadContext &ctx, const ClFusedKernelGraph &fused_kernel_graph)
-{
- workload.context = ctx;
- for(const auto &fg : traverse(fused_kernel_graph))
- {
- ClKernelBlueprint bp{};
- for(const auto &kernel : traverse(*fg))
- {
- const auto st = kernel->generate(bp);
- if(!bool(st))
- {
- return st;
- }
- }
- auto st = set_tile_info(bp, fg->get_root_kernel()->config().tile_desc);
- if(!bool(st))
- {
- return st;
- }
- st = generate_store(bp, fused_kernel_graph, *fg);
- if(!bool(st))
- {
- return st;
- }
-
- ClKernelCode code{};
- st = build(code, ClCodeBuilderContext{ ctx.gpu_info }, bp);
- if(!bool(st))
- {
- return st;
- }
- const auto bp_graph = get_dependency_graph(bp);
-
- // Get tensor info
- std::vector<Id> workload_src_tensors{};
- for(const auto &src_t_id : fused_kernel_graph.fg_dependency.src_tensors(fg->id))
- {
- const auto src_t = fused_kernel_graph.original_graph->get_tensor(src_t_id);
- // Get corresponding kernel arg descriptor
- const auto arg_desc = code.arguments.at(bp_graph.get_merge_points().at(src_t->id));
- const auto kernel_t_id = workload.add_workload_tensor(src_t->desc, src_t->memory_type, src_t->memory_info, arg_desc, src_t->id);
- workload_src_tensors.push_back(kernel_t_id);
- }
- std::vector<Id> workload_dst_tensors{};
- for(const auto &dst_t_id : fused_kernel_graph.fg_dependency.dst_tensors(fg->id))
- {
- const auto dst_t = fused_kernel_graph.original_graph->get_tensor(dst_t_id);
- // Get corresponding kernel arg descriptor
- const auto arg_desc = code.arguments.at(bp_graph.get_merge_points().at(dst_t->id));
- const auto kernel_t_id = workload.add_workload_tensor(dst_t->desc, dst_t->memory_type, dst_t->memory_info, arg_desc, dst_t->id);
- workload_dst_tensors.push_back(kernel_t_id);
- }
-
- workload.add_unit_workload(fg->get_root_kernel()->config().stage, code, workload_src_tensors, workload_dst_tensors);
- }
-
- return Status{};
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h
deleted file mode 100644
index 2051f1b62f..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/experimental/DependencyGraph.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h"
-#include "support/DeepCopy.h"
-
-#include <vector>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-struct ClKernelFusionGroup;
-
-/** A const view of a subgraph of the @ref ClKernelGraph to be fused together
- *
- */
-struct ClKernelFusionGroup
-{
-public:
- using Id = DependencyGraph::Id;
-
- ClKernelFusionGroup() = default;
- ClKernelFusionGroup(Id id)
- : id{ id }, graph{}, fused_kernels{}, tensors{}
- {
- }
- ~ClKernelFusionGroup() = default;
-
- void set_id(Id i)
- {
- id = i;
- }
-
- Id add_fused_kernel(const ClKernel *kernel)
- {
- /// PRE: Acyclicity ensured by DependencyGraph
- /// PRE: Connectedness ensured by DependencyGraph
- /// PRE: Single-rootedness ensured by User
- std::vector<Id> src_tensors;
- for(const auto t : kernel->tensors().get_const_src_tensors())
- {
- auto id = graph.add_tensor(t->id);
- if(tensors.find(id) == tensors.end())
- {
- tensors[id] = t;
- }
- src_tensors.push_back(id);
- }
- std::vector<Id> dst_tensors;
- for(const auto t : kernel->tensors().get_const_dst_tensors())
- {
- auto id = graph.add_tensor(t->id);
- if(tensors.find(id) == tensors.end())
- {
- tensors[id] = t;
- }
- dst_tensors.push_back(id);
- }
- auto id = graph.add_operator(src_tensors, dst_tensors);
- fused_kernels[id.second] = kernel;
- return id.second;
- }
-
- const ClKernel *get_root_kernel() const
- {
- auto root_kernels = graph.get_root_ops();
- ARM_COMPUTE_ERROR_ON(root_kernels.size() != 1);
- return fused_kernels.at(root_kernels.at(0));
- }
-
- std::vector<const ClKernelTensor *> get_src_tensors() const
- {
- std::vector<const ClKernelTensor *> src_tensors;
- for(auto tensor_id : graph.src_tensors())
- {
- src_tensors.push_back(tensors.at(tensor_id));
- }
- return src_tensors;
- }
-
- std::vector<const ClKernelTensor *> get_dst_tensors() const
- {
- std::vector<const ClKernelTensor *> dst_tensors;
- for(auto tensor_id : graph.dst_tensors())
- {
- dst_tensors.push_back(tensors.at(tensor_id));
- }
- return dst_tensors;
- }
-
- friend bool operator==(const ClKernelFusionGroup &fg0, const ClKernelFusionGroup &fg1)
- {
- return fg0.id == fg1.id && fg0.graph == fg1.graph && fg0.fused_kernels == fg1.fused_kernels && fg0.tensors == fg1.tensors;
- }
-
- Id id{};
- DependencyGraph graph{}; // A subgraph of the original ClKernelGraph
- std::map<Id, const ClKernel *> fused_kernels{};
- std::map<Id, const ClKernelTensor *> tensors{};
-};
-
-std::vector<const ClKernel *> traverse(const ClKernelFusionGroup &group);
-
-struct ClFusedKernelGraph
-{
-public:
- using Id = DependencyGraph::Id;
-
- using KernelFusionGroupMap = std::map<Id, utils::memory::deep_unique_ptr<ClKernelFusionGroup>>;
-
- ClFusedKernelGraph() = default;
- ~ClFusedKernelGraph() = default;
- ClFusedKernelGraph(const ClFusedKernelGraph &graph) = default;
- ClFusedKernelGraph &operator=(const ClFusedKernelGraph &graph) = default;
- ClFusedKernelGraph(ClFusedKernelGraph &&graph) = default;
- ClFusedKernelGraph &operator=(ClFusedKernelGraph &&graph) = default;
-
- friend bool operator==(const ClFusedKernelGraph &graph0, const ClFusedKernelGraph &graph1)
- {
- /// NOTE: fg_dependency may change based on the order of fusion, and thus is omitted in the comparison.
- /// The fusion groups can already guarantee the equivalence of fusion
- /// In the future we may want to enforce a stronger equivalence by implementing topological comparison between @ref DependencyGraph s
- return graph0.original_graph == graph1.original_graph && graph0.fusion_groups == graph1.fusion_groups;
- }
-
- Id add_fusion_group(const std::vector<const ClKernel *> &fused_kernels)
- {
- auto fg = utils::memory::make_deep_unique<ClKernelFusionGroup, ClKernelFusionGroup>();
- for(const auto k : fused_kernels)
- {
- fg->add_fused_kernel(k);
- }
- const auto src_tensors = fg->get_src_tensors();
- const auto dst_tensors = fg->get_dst_tensors();
- std::vector<Id> inputs{};
- std::transform(std::begin(src_tensors), std::end(src_tensors), std::back_inserter(inputs), [this](auto kernel)
- {
- return fg_dependency.add_tensor(kernel->id);
- });
- std::vector<Id> outputs{};
- std::transform(std::begin(dst_tensors), std::end(dst_tensors), std::back_inserter(outputs), [this](auto kernel)
- {
- return fg_dependency.add_tensor(kernel->id);
- });
- const auto id = fg_dependency.add_operator(inputs, outputs);
- fg->set_id(id.second);
- fusion_groups[id.second] = std::move(fg);
- return id.second;
- }
-
- Status fuse(ClKernelFusionGroup &fg0, ClKernelFusionGroup &fg1)
- {
- /// PRE: Already checked by can_fuse, and thus all the INVs and ASSUMPTIONS still hold
- ClKernelFusionGroup *fg_src{};
- ClKernelFusionGroup *fg_dst{};
- // Find fg_src (parent / root) and fg_dst (child / non-root)
- if(is_in(fg1.id, fg_dependency.dst_ops(fg0.id)))
- {
- fg_src = &fg0;
- fg_dst = &fg1;
- }
- else if(is_in(fg0.id, fg_dependency.dst_ops(fg1.id)))
- {
- fg_src = &fg1;
- fg_dst = &fg0;
- }
- else
- {
- return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: Not directly connected fusion groups cannot be fused together" };
- }
-
- for(const auto &t : fg_dependency.src_tensors(fg_dst->id))
- {
- if(!is_in(t, fg_dependency.dst_tensors(fg_src->id)))
- {
- // Link any incoming tensors of fg_dst, that ARE NOT in between fg_src and fg_dst, to fg_src
-
- // Before:
- // fg_src
- // |
- // .. t1
- // | |
- // -> fg_dst <-
- //
- // After:
- // fg_src <---t1
- //
- const auto st = link_src_tensors(fg_src->id, { t });
- if(!bool(st))
- {
- return st;
- }
- }
- else
- {
- const auto dst_fgs = fg_dependency.dst_ops_from_tensor(t);
- if(dst_fgs.size() == 1U && dst_fgs.at(0) == fg_dst->id)
- {
- // Remove any incoming tensors of fg_dst, that ARE in between fg_src and fg_dst
- // AND that are not connected to any other outgoing fgs (Note that they cannot connect to any other incoming fgs as all tensors can have at most 1 incoming fg (ASSUMPTION 3))
-
- // Before:
- // fg_src
- // |
- // t0
- // |
- // -> fg_dst
- //
- // After:
- // fg_src
- //
- const auto st = remove_fg_tensor(t);
- if(!bool(st))
- {
- return st;
- }
- }
- else
- {
- // If the tensors ARE in between fg_src and fg_dst
- // BUT have any other outgoing fgs than fg_dst, then we leave it as a dst tensor to the fused fg_src
-
- // Before:
- // fg_src
- // |
- // t0
- // |
- // |-----------
- // | |
- // -> fg_dst -> fg_other
- //
- // After:
- // fg_src
- // |
- // t0
- // |
- // -> fg_other
- //
-
- // Note that this may seem like a case we shouldn't fuse. But actually all it means is that t0 is an
- // intermediate tensor between the fused fg_src and fg_dst, but only that we also STORE it to memory
- // so that any unfused fg's (fg_other in this case) can read it.
- // So all this means that we not only can STORE the tensors at the "end" of a fusion group,
- // but also any other tensors that are not source tensors. And all tensors that are STORED (exported),
- // can be termed "dst tensors" to a fusion group
- void();
- }
- }
- }
-
- for(const auto &t : fg_dependency.dst_tensors(fg_dst->id))
- {
- // Link any outgoing tensors of fg_dst to fg_src
-
- // Before:
- // fg_src
- // |
- // ..
- // |
- // -> fg_dst
- // |
- // |--------
- // | |
- // |-> t0 |-> t1
- //
- // After:
- // fg_src
- // |
- // |--------
- // | |
- // |-> t0 |-> t1
- //
- const auto st = link_dst_tensors(fg_src->id, { t });
- if(!bool(st))
- {
- return st;
- }
- }
-
- // Merge fg_dst's graph into fg_src's graph
- for(const auto kernel : traverse(*fg_dst))
- {
- fg_src->add_fused_kernel(kernel);
- }
-
- const auto st = remove_fg(fg_dst->id);
- return st;
- }
- Status can_fuse(const ClKernelFusionGroup &fg0, const ClKernelFusionGroup &fg1) const
- {
- /// ASSUMPTION0: All tensors have 0 or 1 incoming kernel
- /// ASSUMPTION1: All kernels have exactly 1 dst tensor (Temporary, can be lifted once we start supporting multi-dst kernels)
- /// Note that this does not apply to fusion groups
- /// ASSUMPTION2: Simple kernels' tile infos can be overriden (share with) that of the root kernel's
- /// ASSUMPTION3: Extension of ASSUMPTION0: All tensors have 0 or 1 incoming fusion group
- /// INV0: All Fusion groups have a single root
- /// INV1: All Fusion groups have no cycles or loops within themselves <- guaranteed by the underlying ClKernelGraph having no cycles or loops; enforced by DependencyGraph
- /// INV2: The ClKernelFusionGroup itself has no cycles or loops <- enforced by DependencyGraph
- /// INV3: All non-roots are Simple kernels
- /// INV4: All non roots' dst tensors have the same shape as that of the root kernel
- /// INV5: All kernels within a fusion group have the same UnitWorkloadStage
- const ClKernelFusionGroup *fg_src {};
- const ClKernelFusionGroup *fg_dst{};
-
- // Check 0: Ensure fg0 and fg1 are "directly connected": one of them is a direct parent of the other
- // This guarantess INV0
- // This also finds fg_src (parent / root) and fg_dst (child / non-root)
- if(is_in(fg1.id, fg_dependency.dst_ops(fg0.id)))
- {
- fg_src = &fg0;
- fg_dst = &fg1;
- }
- else if(is_in(fg0.id, fg_dependency.dst_ops(fg1.id)))
- {
- fg_src = &fg1;
- fg_dst = &fg0;
- }
- else
- {
- return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: Not directly connected fusion groups cannot be fused together" };
- }
-
- // Find unconnected tensors between fg_src and fg_dst
- std::vector<Id> unconnected_tensors{};
- for(const auto &t : fg_dependency.dst_tensors(fg_src->id))
- {
- if(!is_in(t, fg_dependency.src_tensors(fg_dst->id)))
- {
- unconnected_tensors.push_back(t);
- }
- }
-
- // Check 1: Any unconnected tensor cannot be an ancestor of fg_dst
- // This guarantees INV2: That is, the fused graph does not have any cycles or loops between different fusion groups
- for(const auto &t : unconnected_tensors)
- {
- if(fg_dependency.path_exists_from_tensor_to_op(t, fg_dst->id))
- {
- return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: the fusion would result in cycles or loops" };
- }
- }
-
- // Check 2: All non-root fgs are simple. Ensure INV3
- if(fg_dst->get_root_kernel()->complexity() != Complexity::Simple)
- {
- return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: only root kernel can be a complex kernel" };
- }
-
- // Check 3: All non roots' dst tensors have the same shape as that of the root kernel. Ensure INV4
- const auto root_kernel_dst_tensors = fg_dependency.dst_tensors(fg_src->id);
- ARM_COMPUTE_ERROR_ON(root_kernel_dst_tensors.size() != 1); // (ASSUMPTION 1: All kernels have exactly 1 dst tensor)
- const auto root_kernel_dst_tensor_info = original_graph->get_tensor(root_kernel_dst_tensors[0])->desc;
-
- for(const auto &t : fg_dependency.dst_tensors(fg_dst->id))
- {
- const auto t_info = original_graph->get_tensor(t)->desc;
- if(detail::have_different_dimensions(root_kernel_dst_tensor_info->tensor_shape(), t_info->tensor_shape(), 0))
- {
- return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: all non roots' dst tensors should have the same shape as that of the root kernel" };
- }
- }
-
- // Check 4: All kernels within a fg have the same UnitWorkloadStage. Ensure INV5
- if(!(fg_src->get_root_kernel()->config().stage == fg_dst->get_root_kernel()->config().stage))
- {
- return Status{ ErrorCode::RUNTIME_ERROR, "Invalid fusion: all kernels within a fusion group should have the same UnitWorkloadStage" };
- }
-
- return Status{};
- }
-
- const ClKernelGraph *original_graph{};
- DependencyGraph fg_dependency{};
- KernelFusionGroupMap fusion_groups{};
- // Note: no need to store tensors pointers in the ClFusedKernelGraph, as they are stored in side the individual fusion groups.
-
-private:
- Status link_src_tensors(Id fg, const std::vector<Id> &src_tensors)
- {
- for(auto t : src_tensors)
- {
- fg_dependency.link_input(fg, t);
- }
- return Status{};
- }
- Status link_dst_tensors(Id fg, const std::vector<Id> &dst_tensors)
- {
- for(auto t : dst_tensors)
- {
- fg_dependency.link_output(fg, t);
- }
- return Status{};
- }
- Status remove_fg(Id fg)
- {
- fg_dependency.remove_operator(fg);
- fusion_groups.erase(fg);
- return Status{};
- }
- Status remove_fg_tensor(Id tensor)
- {
- fg_dependency.remove_tensor(tensor);
- return Status{};
- }
-};
-
-std::vector<const ClKernelFusionGroup *> traverse(const ClFusedKernelGraph &graph);
-std::vector<ClKernelFusionGroup *> traverse(ClFusedKernelGraph &graph);
-
-std::pair<Status, ClFusedKernelGraph> init_fusion_graph(const ClKernelGraph &kernel_graph);
-
-Status fuse(ClFusedKernelGraph &fused_kernel_graph);
-
-Status generate_store(ClKernelBlueprint &bp, const ClFusedKernelGraph &fused_kernel_graph, const ClKernelFusionGroup &fg);
-
-Status generate(ClWorkload &workload, const ClWorkloadContext &ctx, const ClFusedKernelGraph &fused_kernel_graph);
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLFUSEDKERNELGRAPH_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
deleted file mode 100644
index f10e97e3e9..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELDESCRIPTORS_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELDESCRIPTORS_H
-
-#include "arm_compute/core/experimental/OperatorGraph.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-struct ClDirectConv2dKernelDescriptor
-{
- friend bool operator==(const ClDirectConv2dKernelDescriptor &desc0, const ClDirectConv2dKernelDescriptor &desc1)
- {
- return desc0.conv2d == desc1.conv2d;
- }
- Conv2dDescriptor conv2d{};
-};
-
-struct ClElementwiseKernelDescriptor
-{
- friend bool operator==(const ClElementwiseKernelDescriptor &desc0, const ClElementwiseKernelDescriptor &desc1)
- {
- return desc0.eltwise == desc1.eltwise;
- }
- ElementwiseDescriptor eltwise{};
-};
-
-struct ClFloorKernelDescriptor
-{
- friend bool operator==(const ClFloorKernelDescriptor &desc0, const ClFloorKernelDescriptor &desc1)
- {
- return desc0.floor == desc1.floor;
- }
- FloorDescriptor floor{};
-};
-
-struct ClActivationKernelDescriptor
-{
- friend bool operator==(const ClActivationKernelDescriptor &, const ClActivationKernelDescriptor &)
- {
- return true;
- }
-};
-
-enum class ClippingStrategy
-{
- TOP_LEFT,
- TOP_RIGHT,
- BOTTOM_LEFT,
- BOTTOM_RIGHT,
-};
-/** Component: Store */
-struct TileDescriptor
-{
- Size2D tile_dims{};
- Size2D boundaries{};
- ClippingStrategy clipping{ ClippingStrategy::TOP_LEFT };
-
- TileDescriptor()
- {
- }
-
- TileDescriptor(Size2D dims, const Size2D &bound, const ClippingStrategy &clip)
- : tile_dims(dims), boundaries(bound), clipping(clip)
- {
- }
-
- bool empty() const
- {
- return (tile_dims.area() == 0) || (boundaries.area() == 0);
- }
- friend bool operator==(const TileDescriptor &tile0, const TileDescriptor &tile1)
- {
- return tile0.tile_dims == tile1.tile_dims && tile0.boundaries == tile1.boundaries && tile0.clipping == tile1.clipping;
- }
-};
-enum class StoreType
-{
- VStore,
- VStorePartial,
- StoreRow,
- ConvertStoreRow,
- StoreBlock,
- ConvertStoreBlock,
- StoreRowPartial,
- StoreBlockPartial,
- StoreBlockBoundaryAware,
- StoreVectorSelect,
- TStoreIndirectWidthSelect
-};
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELDESCRIPTORS_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
deleted file mode 100644
index cab51a2ce6..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.cpp
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
-#include "src/core/CL/CLValidate.h"
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h"
-
-#include "support/Cast.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-Status ClDirectConv2dKernel::generate(ClKernelBlueprint &bp) const
-{
- const auto input = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
- const auto weight = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
- const auto bias = _tensors.get_const_tensor(TensorType::ACL_SRC_2);
- const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weight, dst);
- ArgumentID input_id;
- add_tensor(bp, input->desc, input_id, input->id);
- ArgumentID weight_id;
- add_tensor(bp, weight->desc, weight_id, weight->id);
- ArgumentID bias_id = g_arg_placeholder;
- if(bias != nullptr)
- {
- add_tensor(bp, bias->desc, bias_id, bias->id);
- }
- ArgumentID dst_id;
- add_tensor(bp, dst->desc, dst_id, dst->id);
-
- add_kcomp_direct_conv2d(bp, desc, input_id, weight_id, bias_id, dst_id);
- return Status{};
-}
-Status ClDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ClDirectConv2dKernelDescriptor &conv2d_desc)
-{
- // 1. Check validity
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
- // Matching data type
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
- if(biases != nullptr)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, biases);
- }
-
- // Matching data layout
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, weights);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
- if(biases != nullptr)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, biases);
- }
-
- // All tensor infos are initialized
- ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(weights->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
- if(biases != nullptr)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(biases->tensor_shape().total_size() == 0);
- }
- // Device requirements are met
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
- // weights shape is correct
- const DataLayout data_layout = src->data_layout();
- const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(channel_idx) != src->dimension(channel_idx), "Weights feature map dimension should match the respective src's one");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->num_dimensions() > 4, "Weights can be at most 4 dimensional");
-
- // dst shape is correct
- PadStrideInfo legacy_pad_stride(conv2d_desc.conv2d.stride.x(), conv2d_desc.conv2d.stride.y(), conv2d_desc.conv2d.pad.left, conv2d_desc.conv2d.pad.right, conv2d_desc.conv2d.pad.top,
- conv2d_desc.conv2d.pad.bottom, DimensionRoundingType{});
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(),
- misc::shape_calculator::compute_deep_convolution_shape(*src, *weights, legacy_pad_stride));
-
- // biases shape is correct
- if(biases != nullptr)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(biases->dimension(0) != weights->dimension(3),
- "Biases size and number of dst feature maps should match");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(biases->num_dimensions() > 1,
- "Biases should be one dimensional");
- }
-
- // 2. Check support level
- // Data type
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
- // Data layout
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
-
- return Status{};
-}
-
-bool ClDirectConv2dKernel::operator==(const ClKernel &other) const
-{
- const auto converted = *utils::cast::polymorphic_downcast<const ClDirectConv2dKernel *>(&other);
- return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
-}
-
-Status ClElementwiseKernel::generate(ClKernelBlueprint &bp) const
-{
- const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
- const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
- const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
- ArgumentID lhs_id;
- add_tensor(bp, lhs->desc, lhs_id, lhs->id);
- ArgumentID rhs_id;
- add_tensor(bp, rhs->desc, rhs_id, rhs->id);
- ArgumentID dst_id;
- add_tensor(bp, dst->desc, dst_id, dst->id);
-
- add_kcomp_eltwise_op(bp, desc, lhs_id, rhs_id, dst_id);
- return Status{};
-}
-
-Status ClElementwiseKernel::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst)
-{
- // 1. Check validity
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
-
- // Matching data type
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, dst);
-
- // Matching data layout
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(lhs, rhs);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(lhs, dst);
-
- // All tensor infos are initialized
- ARM_COMPUTE_RETURN_ERROR_ON(lhs->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(rhs->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-
- // Device requirements are met
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(lhs);
-
- const bool in_place = (lhs == dst) || (rhs == dst);
- const bool src0_in_place = in_place && (lhs == dst);
-
- // dst shape is correct
- const TensorShape out_shape = TensorShape::broadcast_shape(lhs->tensor_shape(), rhs->tensor_shape());
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0), "Wrong shape for dst");
- if(in_place)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, src0_in_place ? lhs->tensor_shape() : rhs->tensor_shape(), 0),
- "Wrong shape for dst, cannot do in_place calculation");
- }
-
- // 2. Check support level
-
- // Data type
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16);
-
- // Data layout
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(lhs, DataLayout::NHWC);
-
- return Status{};
-}
-
-bool ClElementwiseKernel::operator==(const ClKernel &other) const
-{
- const auto converted = *utils::cast::polymorphic_downcast<const ClElementwiseKernel *>(&other);
- return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
-}
-
-Status ClFloorKernel::generate(ClKernelBlueprint &bp) const
-{
- const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
- const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
- ArgumentID src_id;
- add_tensor(bp, src->desc, src_id, src->id);
- ArgumentID dst_id;
- add_tensor(bp, dst->desc, dst_id, dst->id);
-
- add_kcomp_floor(bp, desc, src_id, dst_id);
- return Status{};
-}
-
-Status ClFloorKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- // 1. Check validity
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
-
- // Matching data type
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
-
- // Matching data layout
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
-
- // All tensor infos are initialized
- ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-
- // Device requirements are met
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
-
- // dst shape is correct
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(src->tensor_shape(), dst->tensor_shape(), 0), "Wrong shape for dst");
-
- // 2. Check support level
-
- // Data type
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32, DataType::F16);
-
- // Data layout
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
-
- return Status{};
-}
-
-bool ClFloorKernel::operator==(const ClKernel &other) const
-{
- const auto converted = *utils::cast::polymorphic_downcast<const ClFloorKernel *>(&other);
- return config() == other.config() && tensors() == other.tensors() && desc == converted.desc;
-}
-
-std::vector<const ClKernel *> traverse(const ClKernelGraph &graph)
-{
- std::vector<const ClKernel *> kernels;
- const auto sorted = graph.graph.topological_sort();
- for(const auto &pack : sorted.second)
- {
- kernels.push_back(graph.kernels.at(pack.op).get());
- }
- return kernels;
-}
-
-std::vector<ClKernel *> traverse(ClKernelGraph &graph)
-{
- std::vector<ClKernel *> kernels;
- const auto sorted = graph.graph.topological_sort();
- for(const auto &pack : sorted.second)
- {
- kernels.push_back(graph.kernels.at(pack.op).get());
- }
- return kernels;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
deleted file mode 100644
index c3580cfaca..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELGRAPH_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELGRAPH_H
-
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/experimental/ClWorkload.h"
-#include "arm_compute/core/experimental/DependencyGraph.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelDescriptors.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h"
-#include "support/DeepCopy.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-struct ClKernelGraph;
-class ClKernelBlueprint;
-
-enum class Complexity
-{
- Simple,
- Complex
-};
-
-/** Configurations for ClKernel
- *
- */
-struct ClKernelConfig
-{
- UnitWorkloadStage stage{};
- TileDescriptor tile_desc{};
- StoreType store_type{};
- friend bool operator==(const ClKernelConfig &config0, const ClKernelConfig &config1)
- {
- return config0.stage == config1.stage && config0.tile_desc == config1.tile_desc && config0.store_type == config1.store_type;
- }
-};
-
-struct ClKernelTensor
-{
-public:
- using Id = DependencyGraph::Id;
- ClKernelTensor() = default;
- ClKernelTensor(Id id, ITensorInfo *desc, MemoryType memory_type, const AuxMemoryInfo &memory_info)
- : id{ id }, desc{ desc }, memory_type{ memory_type }, memory_info{ memory_info }
- {
- }
- bool operator==(const ClKernelTensor &other) const
- {
- return desc == other.desc;
- }
-
- Id id{};
- ITensorInfo *desc{};
- MemoryType memory_type{};
- AuxMemoryInfo memory_info{};
-};
-
-struct ClKernel
-{
-public:
- using Id = DependencyGraph::Id;
- ClKernel() = default;
- virtual ~ClKernel() = default;
- ClKernel(const ClKernel &kernel) = default;
- ClKernel &operator=(const ClKernel &kernel) = default;
- ClKernel(ClKernel &&kernel) = default;
- ClKernel &operator=(ClKernel &&kernel) = default;
- ClKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ITensorDescPack<ClKernelTensor> &tensors)
- : _graph{ graph }, _id{ id }, _config{ config }, _tensors{ tensors }
- {
- }
- virtual bool operator==(const ClKernel &other) const = 0;
- virtual Complexity complexity() const = 0;
- virtual Status generate(ClKernelBlueprint &bp) const = 0;
- Id id() const
- {
- return _id;
- }
- ITensorDescPack<ClKernelTensor> tensors() const
- {
- return _tensors;
- }
- ClKernelConfig config() const
- {
- return _config;
- }
-
-protected:
- const ClKernelGraph *_graph {};
- Id _id{};
- ClKernelConfig _config{};
- ITensorDescPack<ClKernelTensor> _tensors{};
-};
-
-struct ClDirectConv2dKernel : public ClKernel
-{
-public:
- Complexity complexity() const override
- {
- return Complexity::Complex;
- }
- ClDirectConv2dKernel() = default;
- ~ClDirectConv2dKernel() override = default;
- ClDirectConv2dKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig config, const ClDirectConv2dKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
- : ClKernel{ graph, id, config, tensors }, desc{ desc }
- {
- }
- static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ClDirectConv2dKernelDescriptor &conv2d_desc);
- bool operator==(const ClKernel &other) const override;
- Status generate(ClKernelBlueprint &bp) const override;
-
- ClDirectConv2dKernelDescriptor desc{};
-};
-
-struct ClElementwiseKernel : public ClKernel
-{
-public:
- Complexity complexity() const override
- {
- return Complexity::Simple;
- }
- ClElementwiseKernel() = default;
- ~ClElementwiseKernel() override = default;
- ClElementwiseKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClElementwiseKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
- : ClKernel{ graph, id, config, tensors }, desc{ desc }
- {
- }
- static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst);
- bool operator==(const ClKernel &other) const override;
- Status generate(ClKernelBlueprint &bp) const override;
-
- ClElementwiseKernelDescriptor desc{};
-};
-
-struct ClFloorKernel : public ClKernel
-{
-public:
- Complexity complexity() const override
- {
- return Complexity::Simple;
- }
- ClFloorKernel() = default;
- ~ClFloorKernel() override = default;
- ClFloorKernel(const ClKernelGraph *graph, Id id, const ClKernelConfig &config, const ClFloorKernelDescriptor &desc, const ITensorDescPack<ClKernelTensor> tensors)
- : ClKernel{ graph, id, config, tensors }, desc{ desc }
- {
- }
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
- bool operator==(const ClKernel &other) const override;
- Status generate(ClKernelBlueprint &bp) const override;
-
- ClFloorKernelDescriptor desc{};
-};
-
-struct ClKernelGraph
-{
-public:
- using Id = DependencyGraph::Id;
- using KernelMap = std::map<Id, utils::memory::deep_unique_ptr<ClKernel>>;
- using KernelTensorMap = std::map<Id, utils::memory::deep_unique_ptr<ClKernelTensor>>;
-
- ClKernelGraph() = default;
- ~ClKernelGraph() = default;
-
- friend bool operator==(const ClKernelGraph &graph0, const ClKernelGraph &graph1)
- {
- return graph0.graph == graph1.graph && graph0.kernels == graph1.kernels && graph0.tensors == graph1.tensors;
- }
-
- Status add_kernel_tensor(ITensorInfo *desc, MemoryType memory_type, const AuxMemoryInfo &memory_info, Id &tensor_id, Id merge_point = DependencyGraph::empty_id())
- {
- tensor_id = graph.add_tensor(merge_point);
- if(tensors.find(tensor_id) == tensors.end())
- {
- tensors[tensor_id] = utils::memory::make_deep_unique<ClKernelTensor, ClKernelTensor>(tensor_id, desc, memory_type, memory_info);
- }
- return Status{};
- }
-
- template <typename ContentT, typename KernelDescT>
- Status add_kernel(const ClKernelConfig &config, const KernelDescT &desc, const ITensorDescPack<ClKernelTensor> &tensors, Id &kernel_id)
- {
- const auto src_tensors = tensors.get_const_src_tensors();
- const auto dst_tensors = tensors.get_const_dst_tensors();
- std::vector<Id> src_tensor_ids{};
- std::vector<Id> dst_tensor_ids{};
- for(const auto &t : src_tensors)
- {
- src_tensor_ids.push_back(t->id);
- }
- for(const auto &t : dst_tensors)
- {
- dst_tensor_ids.push_back(t->id);
- }
- kernel_id = graph.add_operator(src_tensor_ids, dst_tensor_ids).second;
- auto k = utils::memory::make_deep_unique<ClKernel, ContentT>(this, kernel_id, config, desc, tensors);
- kernels[kernel_id] = std::move(k);
- return Status{};
- }
-
- ClKernel *get_kernel(Id id)
- {
- return kernels.at(id).get();
- }
- const ClKernel *get_kernel(Id id) const
- {
- return kernels.at(id).get();
- }
-
- ClKernelTensor *get_tensor(Id id)
- {
- return tensors.at(id).get();
- }
- const ClKernelTensor *get_tensor(Id id) const
- {
- return tensors.at(id).get();
- }
-
- DependencyGraph graph{};
- KernelMap kernels{};
- KernelTensorMap tensors{};
-};
-using Id = DependencyGraph::Id;
-
-std::vector<const ClKernel *> traverse(const ClKernelGraph &graph);
-std::vector<ClKernel *> traverse(ClKernelGraph &graph);
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLKERNELGRAPH_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClWorkload.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/ClWorkload.cpp
deleted file mode 100644
index dcada4f64b..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ClWorkload.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "arm_compute/core/experimental/ClWorkload.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClFusedKernelGraph.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-Status build(ClWorkload &workload, const OperatorGraph &op_graph, const ClWorkloadContext &ctx)
-{
- workload.context = ctx;
- ClKernelGraph kernel_graph;
- workload.status = validate(op_graph);
- ARM_COMPUTE_RETURN_ON_ERROR(workload.status);
- workload.status = translate(kernel_graph, *op_graph.impl());
- ARM_COMPUTE_RETURN_ON_ERROR(workload.status);
- ClFusedKernelGraph fused_k_graph;
- std::tie(workload.status, fused_k_graph) = init_fusion_graph(kernel_graph);
- ARM_COMPUTE_RETURN_ON_ERROR(workload.status);
- workload.status = fuse(fused_k_graph);
- ARM_COMPUTE_RETURN_ON_ERROR(workload.status);
- workload.status = generate(workload, ctx, fused_k_graph);
- ARM_COMPUTE_RETURN_ON_ERROR(workload.status);
-
- // Get operator tensor id to workload tensor id map
- const auto op_tensor_to_kernel_tensor = fused_k_graph.original_graph->graph.get_merge_points();
- const auto kernel_tensor_to_workload_tensor = workload.graph.get_merge_points();
- for(const auto op_t : op_graph.impl()->graph.src_tensors())
- {
- const auto kernel_t = op_tensor_to_kernel_tensor.at(op_t);
- const auto workload_t = kernel_tensor_to_workload_tensor.at(kernel_t);
- workload.op_tensor_id_lut[workload_t] = op_t;
- }
- for(const auto op_t : op_graph.impl()->graph.dst_tensors())
- {
- const auto kernel_t = op_tensor_to_kernel_tensor.at(op_t);
- const auto workload_t = kernel_tensor_to_workload_tensor.at(kernel_t);
- workload.op_tensor_id_lut[workload_t] = op_t;
- }
- return workload.status;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/DependencyGraph.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/DependencyGraph.cpp
deleted file mode 100644
index 7350255ebe..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/DependencyGraph.cpp
+++ /dev/null
@@ -1,430 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "arm_compute/core/experimental/DependencyGraph.h"
-
-#include <algorithm>
-#include <deque>
-#include <set>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-DependencyGraph::DependencyGraph(const AdjList &adj_src_tensors, const AdjList &adj_dst_tensors, const AdjList &adj_src_ops, const AdjList &adj_dst_ops, std::map<Id, Id> merge_points)
- : _adj_src_tensors{ adj_src_tensors }, _adj_dst_tensors{ adj_dst_tensors }, _adj_src_ops{ adj_src_ops }, _adj_dst_ops{ adj_dst_ops }, _merge_to_internal{ merge_points }, _operator_id{}, _tensor_id{}
-{
-}
-DependencyGraph::DependencyGraph(const std::vector<Id> &imported_tensors)
- : _adj_src_tensors{}, _adj_dst_tensors{}, _adj_src_ops{}, _adj_dst_ops{}, _merge_to_internal{}, _operator_id{}, _tensor_id{}
-{
- for(auto t : imported_tensors)
- {
- _adj_src_ops[t] = {};
- _adj_dst_ops[t] = {};
- }
-}
-
-Status DependencyGraph::update_merge_point(Id t_id, Id merge_point)
-{
- if(_merge_to_internal.find(merge_point) == _merge_to_internal.end())
- {
- return Status{ ErrorCode::RUNTIME_ERROR, "Merge point does not exist" };
- }
- _merge_to_internal[merge_point] = t_id;
- return Status{};
-}
-
-DependencyGraph::Id DependencyGraph::add_tensor(Id merge_tensor)
-{
- Id new_tensor{ empty_id() };
- if(merge_tensor != empty_id())
- {
- if(_merge_to_internal.find(merge_tensor) != _merge_to_internal.end())
- {
- new_tensor = _merge_to_internal[merge_tensor];
- }
- else
- {
- new_tensor = insert_new_tensor();
- _merge_to_internal[merge_tensor] = new_tensor;
- }
- }
- else
- {
- new_tensor = insert_new_tensor();
- }
- return new_tensor;
-}
-
-void DependencyGraph::remove_tensor(Id tensor)
-{
- for(auto src_op : _adj_src_ops.at(tensor))
- {
- auto &dst_tensors = _adj_dst_tensors.at(src_op);
- dst_tensors.erase(
- std::remove(std::begin(dst_tensors), std::end(dst_tensors), tensor),
- std::end(dst_tensors));
- }
- for(auto dst_op : _adj_dst_ops.at(tensor))
- {
- auto &src_tensors = _adj_src_tensors.at(dst_op);
- src_tensors.erase(
- std::remove(std::begin(src_tensors), std::end(src_tensors), tensor),
- std::end(src_tensors));
- }
- _adj_src_ops.erase(tensor);
- _adj_dst_ops.erase(tensor);
-}
-
-std::pair<Status, DependencyGraph::Id> DependencyGraph::add_operator(const std::vector<Id> &inputs, const std::vector<Id> &outputs)
-{
- Id new_op = insert_new_op();
- for(Id tensor : inputs)
- {
- link_input(new_op, tensor);
- }
- for(Id tensor : outputs)
- {
- link_output(new_op, tensor);
- }
-
- // Use topological sort in order to detect possible loops / cycles.
- // NOTE: This is unscalable. We'll need to have a better way of detecting loops or relax this invariant during operation, and add a validate method instead
- return std::pair<Status, DependencyGraph::Id>(topological_sort().first, new_op);
-}
-
-void DependencyGraph::remove_operator(Id op)
-{
- for(auto src_tensor : _adj_src_tensors.at(op))
- {
- auto &dst_ops = _adj_dst_ops.at(src_tensor);
- dst_ops.erase(
- std::remove(std::begin(dst_ops), std::end(dst_ops), op),
- std::end(dst_ops));
- }
- for(auto dst_tensor : _adj_dst_tensors.at(op))
- {
- auto &src_ops = _adj_src_ops.at(dst_tensor);
- src_ops.erase(
- std::remove(std::begin(src_ops), std::end(src_ops), op),
- std::end(src_ops));
- }
- _adj_src_tensors.erase(op);
- _adj_dst_tensors.erase(op);
-}
-
-std::map<DependencyGraph::Id, DependencyGraph::Id> DependencyGraph::get_merge_points() const
-{
- return _merge_to_internal;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::get_root_ops() const
-{
- std::vector<Id> ops{};
- const auto op_list = all_ops();
-
- for(auto op : op_list)
- {
- if(src_ops(op).empty())
- {
- ops.emplace_back(op);
- }
- }
- return ops;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::get_dst_ops() const
-{
- std::vector<Id> ops{};
- const auto op_list = all_ops();
-
- for(auto op : op_list)
- {
- if(dst_ops(op).empty())
- {
- ops.emplace_back(op);
- }
- }
- return ops;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::src_tensors(Id op) const
-{
- ARM_COMPUTE_ERROR_ON(!operator_exists(op));
- return _adj_src_tensors.at(op);
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::dst_tensors(Id op) const
-{
- ARM_COMPUTE_ERROR_ON(!operator_exists(op));
- return _adj_dst_tensors.at(op);
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::src_tensors() const
-{
- std::vector<Id> tensors;
- for(auto tensor_src_ops : _adj_src_ops)
- {
- if(tensor_src_ops.second.empty())
- tensors.push_back(tensor_src_ops.first);
- }
- return tensors;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::dst_tensors() const
-{
- std::vector<Id> tensors;
- for(auto tensor_dst_ops : _adj_dst_ops)
- {
- if(tensor_dst_ops.second.empty())
- tensors.push_back(tensor_dst_ops.first);
- }
- return tensors;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::src_ops_from_tensor(Id tensor) const
-{
- return _adj_src_ops.at(tensor);
-}
-std::vector<DependencyGraph::Id> DependencyGraph::dst_ops_from_tensor(Id tensor) const
-{
- return _adj_dst_ops.at(tensor);
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::all_ops() const
-{
- std::vector<Id> ops{};
- std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops), [](const auto & it)
- {
- return it.first;
- });
- return ops;
-}
-
-bool DependencyGraph::path_exists_from_tensor_to_op(Id src_tensor, Id dst_op) const
-{
- for(auto child_op : dst_ops_from_tensor(src_tensor))
- {
- if(path_exists_from_op_to_op(child_op, dst_op))
- {
- return true;
- }
- }
- return false;
-}
-
-bool DependencyGraph::path_exists_from_op_to_op(Id src_op, Id dst_op) const
-{
- if(src_op == dst_op)
- {
- return true;
- }
- if(is_in(src_op, get_dst_ops()))
- {
- return false;
- }
- for(auto child_tensor : dst_tensors(src_op))
- {
- if(path_exists_from_tensor_to_op(child_tensor, dst_op))
- {
- return true;
- }
- }
- return false;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::all_tensors() const
-{
- std::vector<Id> tensors{};
- std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors), [](const auto & it)
- {
- return it.first;
- });
- return tensors;
-}
-
-unsigned int DependencyGraph::number_of_ops() const
-{
- return _adj_src_tensors.size();
-}
-
-unsigned int DependencyGraph::number_of_tensors() const
-{
- return _adj_src_ops.size();
-}
-
-DependencyGraph::Id DependencyGraph::insert_new_tensor()
-{
- Id new_tensor = _tensor_id.alloc();
- _adj_src_ops[new_tensor] = {};
- _adj_dst_ops[new_tensor] = {};
- return new_tensor;
-}
-DependencyGraph::Id DependencyGraph::insert_new_op()
-{
- Id new_op = _operator_id.alloc();
- _adj_src_tensors[new_op] = {};
- _adj_dst_tensors[new_op] = {};
- return new_op;
-}
-void DependencyGraph::link_input(Id op, Id in_tensor)
-{
- ARM_COMPUTE_ERROR_ON(!operator_exists(op));
- ARM_COMPUTE_ERROR_ON(!tensor_exists(in_tensor));
- ARM_COMPUTE_ERROR_ON(are_connected(op, in_tensor));
- _adj_src_tensors[op].push_back(in_tensor);
- _adj_dst_ops[in_tensor].push_back(op);
-}
-void DependencyGraph::link_output(Id op, Id out_tensor)
-{
- ARM_COMPUTE_ERROR_ON(!operator_exists(op));
- ARM_COMPUTE_ERROR_ON(!tensor_exists(out_tensor));
- ARM_COMPUTE_ERROR_ON(are_connected(op, out_tensor));
- _adj_dst_tensors[op].push_back(out_tensor);
- _adj_src_ops[out_tensor].push_back(op);
-}
-bool DependencyGraph::tensor_exists(Id tensor) const
-{
- return _adj_src_ops.find(tensor) != _adj_src_ops.end() && _adj_dst_ops.find(tensor) != _adj_dst_ops.end();
-}
-bool DependencyGraph::operator_exists(Id op) const
-{
- return _adj_src_tensors.find(op) != _adj_src_tensors.end() && _adj_dst_tensors.find(op) != _adj_dst_tensors.end();
-}
-
-bool DependencyGraph::is_src_tensor(Id tensor) const
-{
- if(!tensor_exists(tensor))
- {
- return false;
- }
- return _adj_src_ops.at(tensor).empty();
-}
-
-bool DependencyGraph::is_dst_tensor(Id tensor) const
-{
- if(!tensor_exists(tensor))
- {
- return false;
- }
- return _adj_dst_ops.at(tensor).empty();
-}
-bool DependencyGraph::is_src_tensor_of(Id op, Id tensor) const
-{
- if(!operator_exists(op) || !tensor_exists(tensor))
- {
- return false;
- }
- const auto op_inputs = src_tensors(op);
- return std::find(op_inputs.begin(), op_inputs.end(), tensor) != op_inputs.end();
-}
-bool DependencyGraph::is_dst_tensor_of(Id op, Id tensor) const
-{
- if(!operator_exists(op) || !tensor_exists(tensor))
- {
- return false;
- }
- const auto op_outputs = dst_tensors(op);
- return std::find(op_outputs.begin(), op_outputs.end(), tensor) != op_outputs.end();
-}
-bool DependencyGraph::are_connected(Id op, Id tensor) const
-{
- return is_src_tensor_of(op, tensor) || is_dst_tensor_of(op, tensor);
-}
-std::vector<DependencyGraph::Id> DependencyGraph::src_ops(Id op) const
-{
- ARM_COMPUTE_ERROR_ON(!operator_exists(op));
- std::vector<Id> ops{};
- for(Id src_tensor : src_tensors(op))
- {
- ops.insert(ops.end(), std::begin(_adj_src_ops.at(src_tensor)), std::end(_adj_src_ops.at(src_tensor)));
- }
- return ops;
-}
-
-std::vector<DependencyGraph::Id> DependencyGraph::dst_ops(Id op) const
-{
- ARM_COMPUTE_ERROR_ON(!operator_exists(op));
- std::vector<Id> ops{};
- for(Id dst_tensor : _adj_dst_tensors.at(op))
- {
- ops.insert(ops.end(), std::begin(_adj_dst_ops.at(dst_tensor)), std::end(_adj_dst_ops.at(dst_tensor)));
- }
- return ops;
-}
-
-std::pair<Status, std::vector<DependencyGraph::OpPack>> DependencyGraph::topological_sort() const
-{
- // Incident degree (number of source operators to an op)
- std::map<Id, unsigned int> in_degree{};
- std::set<Id> visited_ops{};
- std::deque<Id> zero_in_degree_ops{};
- std::vector<OpPack> sorted_op_packs{};
- for(auto op : all_ops())
- {
- const auto degree = src_ops(op).size();
- in_degree[op] = degree;
- if(degree == 0)
- {
- zero_in_degree_ops.push_back(op);
- visited_ops.insert(op);
- }
- }
-
- while(!zero_in_degree_ops.empty())
- {
- const Id op = zero_in_degree_ops.front();
- zero_in_degree_ops.pop_front();
- sorted_op_packs.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });
-
- for(const auto next_op : dst_ops(op))
- {
- if(in_degree[next_op] > 0)
- {
- in_degree[next_op]--;
- }
- if(in_degree[next_op] == 0 && visited_ops.find(next_op) == visited_ops.end())
- {
- zero_in_degree_ops.push_back(next_op);
- visited_ops.insert(op);
- }
- }
- }
-
- // If there are remaining ops with in_degree > 0, then it's indication that there are cycles in the graph
- Status st{};
- if(sorted_op_packs.size() != number_of_ops())
- {
- st = Status{ ErrorCode::RUNTIME_ERROR, "Cycles or loops are not allowed in a DependencyGraph" };
- }
- return std::make_pair(st, sorted_op_packs);
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h
deleted file mode 100644
index a4e4eaa3bb..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_ITENSORDESCPACK_H
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_ITENSORDESCPACK_H
-
-#include <cstddef>
-#include <unordered_map>
-#include <vector>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-template <typename TDesc>
-class ITensorDescPack
-{
-public:
- struct PackElement
- {
- PackElement() = default;
- ~PackElement() = default;
- PackElement(const PackElement &) = default;
- PackElement &operator=(const PackElement &) = default;
- PackElement(PackElement &&) = default;
- PackElement &operator=(PackElement &&) = default;
- PackElement(int id, TDesc *tensor)
- : id(id), tensor(tensor), ctensor(nullptr)
- {
- }
- PackElement(int id, const TDesc *ctensor)
- : id(id), tensor(nullptr), ctensor(ctensor)
- {
- }
-
- int id{ -1 };
- TDesc *tensor{ nullptr };
- const TDesc *ctensor{ nullptr };
-
- friend bool operator==(const PackElement &elem0, const PackElement &elem1)
- {
- const bool same_ctensor = (elem0.tensor == nullptr && elem1.tensor == nullptr && elem0.ctensor != nullptr && elem1.ctensor != nullptr && *elem0.ctensor == *elem1.ctensor);
- const bool same_tensor = (elem0.ctensor == nullptr && elem1.ctensor == nullptr && elem0.tensor != nullptr && elem1.tensor != nullptr && *elem0.tensor == *elem1.tensor);
-
- return elem0.id == elem1.id && (same_ctensor || same_tensor);
- }
- };
-
-public:
- /** Default Constructor */
- ITensorDescPack() = default;
- ~ITensorDescPack() = default;
- ITensorDescPack<TDesc>(const ITensorDescPack<TDesc> &other) = default;
- ITensorDescPack<TDesc> &operator=(const ITensorDescPack<TDesc> &other) = default;
- ITensorDescPack<TDesc>(ITensorDescPack<TDesc> &&other) = default;
- ITensorDescPack<TDesc> &operator=(ITensorDescPack<TDesc> &&other) = default;
- /** Initializer list Constructor */
- ITensorDescPack(std::initializer_list<PackElement> l)
- : _pack{}
- {
- for(auto &e : l)
- {
- _pack[e.id] = e;
- }
- }
- /** Add tensor to the pack
- *
- * @param[in] id ID/type of the tensor to add
- * @param[in] tensor Tensor to add
- */
- void add_tensor(int id, TDesc *tensor)
- {
- _pack[id] = PackElement(id, tensor);
- }
-
- /** Add const tensor to the pack
- *
- * @param[in] id ID/type of the tensor to add
- * @param[in] tensor Tensor to add
- */
- void add_const_tensor(int id, const TDesc *tensor)
- {
- _pack[id] = PackElement(id, tensor);
- }
- /** Get tensor of a given id from the pac
- *
- * @param[in] id ID of tensor to extract
- *
- * @return The pointer to the tensor if exist and is non-const else nullptr
- */
- TDesc *get_tensor(int id)
- {
- auto it = _pack.find(id);
- return it != _pack.end() ? it->second.tensor : nullptr;
- }
- /** Get constant tensor of a given id
- *
- * @param[in] id ID of tensor to extract
- *
- * @return The pointer to the tensor if exist and is const else nullptr
- */
- const TDesc *get_const_tensor(int id) const
- {
- auto it = _pack.find(id);
- if(it != _pack.end())
- {
- return it->second.ctensor != nullptr ? it->second.ctensor : it->second.tensor;
- }
- return nullptr;
- }
- /** Remove the tensor stored with the given id
- *
- * @param[in] id ID of tensor to remove
- */
- void remove_tensor(int id)
- {
- _pack.erase(id);
- }
- /** Pack size accessor
- *
- * @return Number of tensors registered to the pack
- */
- size_t size() const
- {
- return _pack.size();
- }
- /** Checks if pack is empty
- *
- * @return True if empty else false
- */
- bool empty() const
- {
- return _pack.empty();
- }
-
- /** Get the ACL_SRC_* tensors
- *
- * @return std::vector<TDesc *>
- */
- std::vector<TDesc *> get_src_tensors()
- {
- std::vector<TDesc *> src_tensors{};
- for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
- {
- auto tensor = get_tensor(id);
- if(tensor != nullptr)
- {
- src_tensors.push_back(tensor);
- }
- }
- return src_tensors;
- }
- /** Get the const ACL_SRC_* tensors
- *
- * @return std::vector<const TDesc *>
- */
- std::vector<const TDesc *> get_const_src_tensors() const
- {
- std::vector<const TDesc *> src_tensors{};
- for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
- {
- auto tensor = get_const_tensor(id);
- if(tensor != nullptr)
- {
- src_tensors.push_back(tensor);
- }
- }
- return src_tensors;
- }
- /** Get the ACL_DST_* tensors
- *
- * @return std::vector<TDesc *>
- */
- std::vector<TDesc *> get_dst_tensors()
- {
- std::vector<TDesc *> dst_tensors{};
- for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
- {
- auto tensor = get_tensor(id);
- if(tensor != nullptr)
- {
- dst_tensors.push_back(tensor);
- }
- }
- return dst_tensors;
- }
- /** Get the const ACL_DST_* tensors
- *
- * @return std::vector<const TDesc *>
- */
- std::vector<const TDesc *> get_const_dst_tensors() const
- {
- std::vector<const TDesc *> dst_tensors{};
- for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
- {
- auto tensor = get_const_tensor(id);
- if(tensor != nullptr)
- {
- dst_tensors.push_back(tensor);
- }
- }
- return dst_tensors;
- }
-
- friend bool operator==(const ITensorDescPack<TDesc> &pack0, const ITensorDescPack<TDesc> &pack1)
- {
- return pack0._pack == pack1._pack;
- }
-
-private:
- std::unordered_map<int, PackElement> _pack{}; /**< Container with the packed tensors */
-};
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_ITENSORDESCPACK_H
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
deleted file mode 100644
index 663b89e235..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.cpp
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ClKernelGraph.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h"
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-namespace
-{
-Status add_kernel_tensor(ClKernelGraph &k_graph, const OperatorGraph::Implementation &op_graph, const OpTensorContent &op_tensor, MemoryType memory_type, AuxMemoryInfo memory_info,
- DependencyGraph::Id &id)
-{
- ARM_COMPUTE_UNUSED(op_graph);
- return k_graph.add_kernel_tensor(op_tensor.desc, memory_type, memory_info, id, op_tensor.id);
-}
-
-Status add_kernel_tensor(ClKernelGraph &k_graph, const OperatorGraph::Implementation &op_graph, const OpTensorContent &op_tensor, DependencyGraph::Id &id)
-{
- // For a tensor t
- // 1. If t is a src tensor of the entire op graph, then it's Core.
- // (Optimisation opportunity, if we guanrantee that all translate methods are called in topological order, we can always assign t to Core.
- // Because even if the op is non-root (which would mean t should be an Aux tensor), the src tensors would be already be determined by the ancestor ops (topological order), and thus would not be overriden by it)
- // 2. If t is a dst tensor of the entire op graph, then it's Core.
- // 3. Aux tensor with Persistent and Prepare lifetime is manually specified
- // 4. All other ts not captured by the above are assigned Aux, with lifetime of Temporary.
- // kernel_graph.add_kernel_tensor(input->desc, );
- bool is_src_tensor_of_graph = is_in(op_tensor.id, op_graph.graph.src_tensors());
- bool is_dst_tensor_of_graph = is_in(op_tensor.id, op_graph.graph.dst_tensors());
- MemoryType memory_type;
- AuxMemoryInfo memory_info;
- if(is_src_tensor_of_graph || is_dst_tensor_of_graph)
- {
- memory_type = MemoryType::Core;
- }
- else
- {
- memory_type = MemoryType::Auxiliary;
- memory_info.lifetime = AuxMemoryLifetime::Temporary;
- memory_info.size = op_tensor.desc->total_size();
- }
- return add_kernel_tensor(k_graph, op_graph, op_tensor, memory_type, memory_info, id);
-}
-
-/** Get the suitable kernel size for using direct convolution method with NHWC data layout.
- *
- * @note Duplicate of the function with the same name in src/gpu/cl/operators/ClConv2d.cpp
- *
- * @note Direct convolution should be executed when the kernel has the spatial dimensions greater than or equal to the value returned by this function
- *
- * @param[in] gpu_target GPU target
- *
- * @return the suitable kernel size for using direct convolution method with NHWC data layout
- */
-size_t get_direct_conv_kernel_threshold_nhwc(arm_compute::GPUTarget gpu_target)
-{
- switch(gpu_target)
- {
- case arm_compute::GPUTarget::G76:
- case arm_compute::GPUTarget::G77:
- case arm_compute::GPUTarget::G78:
- return 5;
- case arm_compute::GPUTarget::G71:
- case arm_compute::GPUTarget::G72:
- case arm_compute::GPUTarget::MIDGARD:
- case arm_compute::GPUTarget::BIFROST:
- return 7;
- default:
- return 5;
- }
-}
-} // namespace
-
-bool operator==(const OpTensor &t0, const OpTensor &t1)
-{
- return std::make_tuple(t0.id()) == std::make_tuple(t1.id());
-}
-bool operator==(const Conv2dDescriptor &conv2d0, const Conv2dDescriptor &conv2d1)
-{
- return std::make_tuple(conv2d0.stride, conv2d0.dilation) == std::make_tuple(conv2d1.stride, conv2d1.dilation);
-}
-
-bool operator==(const ElementwiseDescriptor &ed0, const ElementwiseDescriptor &ed1)
-{
- return ed0.op == ed1.op; // Compare Arithmatic Operations of two ElementwiseDescriptor objects
-}
-
-bool operator==(const FloorDescriptor &, const FloorDescriptor &)
-{
- return std::make_tuple() == std::make_tuple(); // Currently two Floor ops are always the same
-}
-
-bool Conv2dContent::operator==(const OperatorContent &other) const
-{
- const auto converted = *utils::cast::polymorphic_downcast<const Conv2dContent *>(&other);
- return desc == converted.desc;
-}
-
-bool ElementwiseContent::operator==(const OperatorContent &other) const
-{
- const auto converted = *utils::cast::polymorphic_downcast<const ElementwiseContent *>(&other);
- return desc == converted.desc;
-}
-
-bool FloorContent::operator==(const OperatorContent &other) const
-{
- const auto converted = *utils::cast::polymorphic_downcast<const FloorContent *>(&other);
- return desc == converted.desc;
-}
-
-ConvolutionMethod Conv2dContent::select_conv_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dDescriptor &conv2d_desc, const GPUTarget gpu_target)
-{
- // Modified from ClConv2d::get_convolution_method
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(src);
- ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
- ARM_COMPUTE_ERROR_ON_NULLPTR(weights);
-
- const PadStrideInfo legacy_pad_stride(conv2d_desc.stride.x(), conv2d_desc.stride.y(), conv2d_desc.pad.left, conv2d_desc.pad.right, conv2d_desc.pad.top, conv2d_desc.pad.bottom, DimensionRoundingType{});
- const Size2D dilation = conv2d_desc.dilation;
-
- const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
- const size_t idx_c = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);
-
- /* Input spatial dims, kernel size, IFM/OFM, conv info*/
- using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo, DataLayout>;
- using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>;
-
- const std::vector<ConfigurationMethod> known_configs =
- {
- // Alexnet
- ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U), DataLayout::NCHW), ConvolutionMethod::DIRECT),
- // VGG16 / VGG19
- ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U), DataLayout::NCHW), ConvolutionMethod::DIRECT),
- // Mobilenet 224
- ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),
- // Mobilenet 160
- ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),
- // Mobilenet 224
- ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),
- // Mobilenet 160
- ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),
- };
-
- const auto find_config = [&](ConfigurationMethod c)
- {
- const ConvolutionConfiguration config = c.first;
- const PadStrideInfo info = std::get<3>(config);
- const DataLayout data_layout = std::get<4>(config);
-
- return std::get<0>(config) == Size2D(src->dimension(idx_w), src->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))
- && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == legacy_pad_stride.pad_top() && info.pad_right() == legacy_pad_stride.pad_right()
- && info.pad_bottom() == legacy_pad_stride.pad_bottom() && info.pad_left() == legacy_pad_stride.pad_left() && info.stride() == legacy_pad_stride.stride() && (data_layout == src->data_layout());
- };
-
- std::vector<ConfigurationMethod>::const_iterator found;
- if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())
- {
- return (*found).second;
- }
-
- if(dilation != Size2D(1U, 1U))
- {
- return ConvolutionMethod::GEMM;
- }
- else
- {
- if(src->data_layout() == DataLayout::NCHW)
- {
- ARM_COMPUTE_ERROR("NCHW not supported");
- }
- else
- {
- const bool is_direct_valid = bool(ClDirectConv2dKernel::validate(src, weights, nullptr, dst, ClDirectConv2dKernelDescriptor{ conv2d_desc }));
- const size_t kernel_sz_direct_conv_thr = get_direct_conv_kernel_threshold_nhwc(gpu_target);
-
- // SRGAN case
- if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv2d_desc.pad.top < 3)
- && is_direct_valid)
- {
- return ConvolutionMethod::DIRECT;
- }
-
- // Floating-point case: GeMM/Direct
- if(is_data_type_float(src->data_type()))
- {
- // Get dst shape
- TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *weights, legacy_pad_stride);
- const bool is_large_kernel_sz = (weights->dimension(idx_w) >= kernel_sz_direct_conv_thr) && (weights->dimension(idx_h) >= kernel_sz_direct_conv_thr);
- const bool is_ifm_ge_16 = src->dimension(idx_c) >= 16;
- const bool is_ofm_lte_8 = weights->dimension(3U) <= 8;
- const bool workload_gte_8192 = (output_shape[0] * output_shape[1] * output_shape[2]) / 16 >= 8192;
- const bool is_ifm_gt_ofm = src->dimension(idx_c) > weights->dimension(3U);
-
- // Direct convolution case
- if(is_direct_valid)
- {
- if((gpu_target == arm_compute::GPUTarget::G71 || gpu_target == arm_compute::GPUTarget::G72 || gpu_target == arm_compute::GPUTarget::MIDGARD))
- {
- if(is_large_kernel_sz && is_ifm_ge_16 && is_ifm_gt_ofm)
- {
- return ConvolutionMethod::DIRECT;
- }
- }
- else
- {
- if((is_large_kernel_sz && workload_gte_8192 && is_ifm_ge_16) || (is_ofm_lte_8 && is_ifm_ge_16))
- {
- return ConvolutionMethod::DIRECT;
- }
- }
- }
-
- // Default case
- return ConvolutionMethod::GEMM;
- }
-
- // Generic case for quantized. Only GeMM
- return ConvolutionMethod::GEMM;
- }
- }
- return ConvolutionMethod::DIRECT;
-}
-
-Status Conv2dContent::translate(ClKernelGraph &kernel_graph) const
-{
- const auto input = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
- const auto weight = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
- const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
- const auto method = forced_method_enabled ? forced_method : Conv2dContent::select_conv_method(input->desc, weight->desc, dst->desc, desc, CLScheduler::get().target());
- switch(method)
- {
- case ConvolutionMethod::DIRECT:
- {
- return translate_direct_conv2d(kernel_graph);
- }
- default:
- {
- ARM_COMPUTE_RETURN_ERROR_MSG("Not implemented");
- }
- }
- return Status{};
-}
-Status Conv2dContent::translate_direct_conv2d(ClKernelGraph &kernel_graph) const
-{
- const auto input = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
- const auto weight = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
- const auto bias = _tensors.get_const_tensor(TensorType::ACL_SRC_2);
- const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weight, dst);
-
- ITensorDescPack<ClKernelTensor> tensors;
-
- DependencyGraph::Id input_id;
- auto st = add_kernel_tensor(kernel_graph, *_graph, *input, input_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(input_id));
-
- DependencyGraph::Id weight_id;
- st = add_kernel_tensor(kernel_graph, *_graph, *weight, weight_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- tensors.add_const_tensor(ACL_SRC_1, kernel_graph.get_tensor(weight_id));
-
- if(bias != nullptr)
- {
- DependencyGraph::Id bias_id;
- st = add_kernel_tensor(kernel_graph, *_graph, *bias, bias_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- tensors.add_const_tensor(ACL_SRC_2, kernel_graph.get_tensor(bias_id));
- }
-
- DependencyGraph::Id dst_id;
- st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id));
-
- DependencyGraph::Id direct_conv2d_id;
- const auto kernel_desc = ClDirectConv2dKernelDescriptor{ desc };
-
- st = ClDirectConv2dKernel::validate(input->desc, weight->desc, bias == nullptr ? nullptr : bias->desc, dst->desc, kernel_desc);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
-
- ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
- st = kernel_graph.add_kernel<ClDirectConv2dKernel>(config, kernel_desc, tensors, direct_conv2d_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- ARM_COMPUTE_UNUSED(direct_conv2d_id);
-
- return Status{};
-}
-
-Status ElementwiseContent::translate(ClKernelGraph &kernel_graph) const
-{
- const auto lhs = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
- const auto rhs = _tensors.get_const_tensor(TensorType::ACL_SRC_1);
- const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
-
- ITensorDescPack<ClKernelTensor> tensors;
-
- DependencyGraph::Id lhs_id;
- auto st = add_kernel_tensor(kernel_graph, *_graph, *lhs, lhs_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(lhs_id));
-
- DependencyGraph::Id rhs_id;
- st = add_kernel_tensor(kernel_graph, *_graph, *rhs, rhs_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- tensors.add_const_tensor(ACL_SRC_1, kernel_graph.get_tensor(rhs_id));
-
- DependencyGraph::Id dst_id;
- st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id));
-
- DependencyGraph::Id add_id;
- ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
-
- st = ClElementwiseKernel::validate(lhs->desc, rhs->desc, dst->desc);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
-
- st = kernel_graph.add_kernel<ClElementwiseKernel>(config, ClElementwiseKernelDescriptor{ desc }, tensors, add_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- ARM_COMPUTE_UNUSED(add_id);
-
- return Status{};
-}
-
-Status FloorContent::translate(ClKernelGraph &kernel_graph) const
-{
- const auto src = _tensors.get_const_tensor(TensorType::ACL_SRC_0);
- const auto dst = _tensors.get_const_tensor(TensorType::ACL_DST_0);
- ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
-
- ITensorDescPack<ClKernelTensor> tensors;
-
- DependencyGraph::Id src_id;
- auto st = add_kernel_tensor(kernel_graph, *_graph, *src, src_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- tensors.add_const_tensor(ACL_SRC_0, kernel_graph.get_tensor(src_id));
-
- DependencyGraph::Id dst_id;
- st = add_kernel_tensor(kernel_graph, *_graph, *dst, dst_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- tensors.add_const_tensor(ACL_DST_0, kernel_graph.get_tensor(dst_id));
-
- DependencyGraph::Id add_id;
- ClKernelConfig config{ UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }, TileDescriptor{}, StoreType::TStoreIndirectWidthSelect };
-
- st = ClFloorKernel::validate(src->desc, dst->desc);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
-
- st = kernel_graph.add_kernel<ClFloorKernel>(config, ClFloorKernelDescriptor{ desc }, tensors, add_id);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
-
- return Status{};
-}
-
-std::vector<const OperatorContent *> traverse(const OperatorGraph::Implementation &graph)
-{
- std::vector<const OperatorContent *> ops;
- const auto sorted = graph.graph.topological_sort();
- for(const auto &pack : sorted.second)
- {
- ops.push_back(graph.operators.at(pack.op).get());
- }
- return ops;
-}
-
-std::vector<OperatorContent *> traverse(OperatorGraph::Implementation &graph)
-{
- std::vector<OperatorContent *> ops;
- const auto sorted = graph.graph.topological_sort();
- for(const auto &pack : sorted.second)
- {
- ops.push_back(graph.operators.at(pack.op).get());
- }
- return ops;
-}
-
-Status translate(ClKernelGraph &kernel_graph, const OperatorGraph::Implementation &op_graph)
-{
- for(const auto &op : traverse(op_graph))
- {
- const auto st = op->translate(kernel_graph);
- ARM_COMPUTE_RETURN_ON_ERROR(st);
- }
- return Status{};
-}
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
deleted file mode 100644
index b303cdb9fc..0000000000
--- a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPHIMPL
-#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPHIMPL
-
-#include "arm_compute/core/experimental/ClWorkload.h"
-#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h"
-
-#include "support/Cast.h"
-#include "support/DeepCopy.h"
-
-#include <map>
-#include <tuple>
-#include <type_traits>
-
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-enum class OperatorComplexity
-{
- Complex = 0,
- Simple
-};
-
-struct ClKernelGraph;
-struct OpTensorContent
-{
-public:
- using Id = DependencyGraph::Id;
- OpTensorContent() = default;
- OpTensorContent(Id id)
- : id{ id }, desc{}
- {
- }
- OpTensorContent(Id id, ITensorInfo *desc)
- : id{ id }, desc{ desc }
- {
- }
- ~OpTensorContent() = default;
- OpTensorContent(const OpTensorContent &) = default;
- OpTensorContent &operator=(const OpTensorContent &) = default;
- OpTensorContent(OpTensorContent &&) = default;
- OpTensorContent &operator=(OpTensorContent &&) = default;
- bool operator==(const OpTensorContent &other) const
- {
- return desc == other.desc;
- }
-
- const ITensorInfo *get_tensor_info() const
- {
- return desc;
- }
- ITensorInfo *get_tensor_info()
- {
- return desc;
- }
-
- Id id{};
- ITensorInfo *desc{};
-};
-
-struct OperatorContent
-{
-public:
- using Id = DependencyGraph::Id;
- OperatorContent() = default;
- OperatorContent(const OperatorGraph::Implementation *graph, Id id, const ITensorDescPack<OpTensorContent> &tensors)
- : _graph{ graph }, _id{ id }, _tensors{ tensors }
- {
- }
- OperatorContent(const OperatorContent &op) = default;
- OperatorContent &operator=(const OperatorContent &op) = default;
- OperatorContent(OperatorContent &&op) = default;
- OperatorContent &operator=(OperatorContent &&op) = default;
- virtual ~OperatorContent() = default;
- virtual OperatorComplexity complexity() const = 0;
- virtual bool operator==(const OperatorContent &other) const = 0;
- virtual Status translate(ClKernelGraph &kernel_graph) const = 0;
-
-protected:
- const OperatorGraph::Implementation *_graph {};
- Id _id{};
- ITensorDescPack<OpTensorContent> _tensors{};
-};
-
-struct Conv2dContent : public OperatorContent
-{
-public:
- Conv2dContent() = default;
- Conv2dContent(const OperatorGraph::Implementation *graph, Id id, const Conv2dDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
- : OperatorContent(graph, id, tensors), desc(desc), forced_method(), forced_method_enabled(false)
- {
- }
- // Temporary. Do not need to pass ConvolutionMethod
- Conv2dContent(const OperatorGraph::Implementation *graph, Id id, const Conv2dDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors, ConvolutionMethod method)
- : OperatorContent(graph, id, tensors), desc(desc), forced_method(method), forced_method_enabled(true)
- {
- }
- ~Conv2dContent() = default;
- Conv2dContent(const Conv2dContent &) = default;
- Conv2dContent &operator=(const Conv2dContent &) = default;
- Conv2dContent(Conv2dContent &&) = default;
- Conv2dContent &operator=(Conv2dContent &&) = default;
- bool operator==(const OperatorContent &other) const override;
- OperatorComplexity complexity() const override
- {
- return OperatorComplexity::Complex;
- }
- void set_method(ConvolutionMethod method)
- {
- forced_method_enabled = true;
- forced_method = method;
- }
-
- Status translate(ClKernelGraph &kernel_graph) const override;
- /** Replicate heuristics of @ref ClConv2d::get_convolution_method(), except that non-supported data types and data layouts are removed from the heuristics
- *
- * @param src
- * @param weights
- * @param dst
- * @param conv2d_desc
- * @param gpu_target
- * @return ConvolutionMethod
- */
- static ConvolutionMethod select_conv_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dDescriptor &conv2d_desc, const GPUTarget gpu_target);
-
- Conv2dDescriptor desc{};
- ConvolutionMethod forced_method{ ConvolutionMethod::GEMM_CONV2D };
- bool forced_method_enabled{ false };
-
-private:
- Status translate_direct_conv2d(ClKernelGraph &kernel_graph) const;
-};
-
-class ElementwiseContent : public OperatorContent
-{
-public:
- ElementwiseContent() = default;
- ElementwiseContent(const OperatorGraph::Implementation *graph, Id id, const ElementwiseDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
- : OperatorContent(graph, id, tensors), desc(desc)
- {
- }
- ~ElementwiseContent() = default;
- ElementwiseContent(const ElementwiseContent &) = default;
- ElementwiseContent &operator=(const ElementwiseContent &) = default;
- ElementwiseContent(ElementwiseContent &&) = default;
- ElementwiseContent &operator=(ElementwiseContent &&) = default;
- bool operator==(const OperatorContent &other) const override;
- OperatorComplexity complexity() const override
- {
- return OperatorComplexity::Simple;
- }
- Status translate(ClKernelGraph &kernel_graph) const override;
-
-private:
- ElementwiseDescriptor desc{};
-};
-
-class FloorContent : public OperatorContent
-{
-public:
- FloorContent() = default;
- FloorContent(const OperatorGraph::Implementation *graph, Id id, const FloorDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
- : OperatorContent(graph, id, tensors), desc(desc)
- {
- }
- ~FloorContent() = default;
- FloorContent(const FloorContent &) = default;
- FloorContent &operator=(const FloorContent &) = default;
- FloorContent(FloorContent &&) = default;
- FloorContent &operator=(FloorContent &&) = default;
- bool operator==(const OperatorContent &other) const override;
- OperatorComplexity complexity() const override
- {
- return OperatorComplexity::Simple;
- }
- Status translate(ClKernelGraph &kernel_graph) const override;
-
-private:
- FloorDescriptor desc{};
-};
-
-struct OperatorGraph::Implementation
-{
-public:
- template <typename ContentT, typename... Args>
- void add_node(Operator::Id id, Args &&... args)
- {
- operators[id] = utils::memory::make_deep_unique<OperatorContent, ContentT>(this, id, std::forward<Args>(args)...);
- }
-
- template <typename... Args>
- void add_tensor(OpTensor::Id id, Args &&... args)
- {
- tensors[id] = utils::memory::make_deep_unique<OpTensorContent, OpTensorContent>(id, std::forward<Args>(args)...);
- }
-
- using Dependency = DependencyGraph;
- using OperatorMap = std::map<Operator::Id, utils::memory::deep_unique_ptr<OperatorContent>>;
- using OpTensorMap = std::map<OpTensor::Id, utils::memory::deep_unique_ptr<OpTensorContent>>;
-
- Implementation() = default;
- ~Implementation() = default;
-
- friend bool operator==(const OperatorGraph::Implementation &graph0, const OperatorGraph::Implementation &graph1)
- {
- return graph0.graph == graph1.graph && graph0.operators == graph1.operators && graph0.tensors == graph1.tensors;
- }
-
- Dependency graph{};
- OperatorMap operators{};
- OpTensorMap tensors{};
- Status status{};
-};
-
-std::vector<const OperatorContent *> traverse(const OperatorGraph::Implementation &graph);
-
-std::vector<OperatorContent *> traverse(OperatorGraph::Implementation &graph);
-
-Status translate(ClKernelGraph &kernel_graph, const OperatorGraph::Implementation &op_graph);
-
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-
-#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPHIMPL
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ \ No newline at end of file