diff options
Diffstat (limited to 'src/dynamic_fusion/sketch/utils')
-rw-r--r-- | src/dynamic_fusion/sketch/utils/DependencyGraph.h | 658 |
1 files changed, 658 insertions, 0 deletions
diff --git a/src/dynamic_fusion/sketch/utils/DependencyGraph.h b/src/dynamic_fusion/sketch/utils/DependencyGraph.h new file mode 100644 index 0000000000..55eb4c5c77 --- /dev/null +++ b/src/dynamic_fusion/sketch/utils/DependencyGraph.h @@ -0,0 +1,658 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH +#define SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH + +#include "arm_compute/core/Error.h" +#include <algorithm> +#include <cstdint> +#include <deque> +#include <map> +#include <set> +#include <tuple> +#include <vector> + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +namespace +{ +template <typename T> +bool is_in(const T &v, const std::vector<T> &vec) +{ + return std::find(std::begin(vec), std::end(vec), v) != std::end(vec); +} +} // namespace + +/** A multi-input (tensors), multi-output (tensors) acyclic directed graph + * Represented as a doubly-linked adjacency list with the differentiation between source and destination + */ +class DependencyGraph +{ +public: + using Id = int32_t; + using TensorId = Id; + using OperatorId = Id; + /** Adjacency list + * + */ + using AdjList = std::map<Id, std::vector<Id>>; + + /** A pack of operator including its input and output tensors, used by traversing through the graph in topological order + * + */ + struct OpPack + { + OperatorId op{}; + std::vector<TensorId> inputs{}; + std::vector<TensorId> outputs{}; + friend bool operator==(const OpPack &opp0, const OpPack &opp1) + { + return std::make_tuple( + opp0.op, opp0.inputs, opp0.outputs) + == std::make_tuple( + opp1.op, opp1.inputs, opp1.outputs); + } + }; + +public: + DependencyGraph() = default; + friend std::ostream &operator<<(std::ostream &os, const DependencyGraph &); + + /** Try adding an operator (without actually adding it), while keeping the graph as a "linear sequence" / list + * @note The list is expected to only grow from head to tail + * + * PRECONDITION: The current graph is already linear + * + * @return true If the operator can be added while keeping the graph as a linear sequence + * @return false Otherwise + */ + bool try_add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs) const + { + ARM_COMPUTE_UNUSED(op, outputs); + if(all_ops().empty()) + { + return true; + } + std::vector<TensorId> common_tensors{}; + auto existing_tensors = all_tensors(); + std::sort(existing_tensors.begin(), existing_tensors.end()); // To use std::set_intersection, both input sets must be sorted + std::vector<TensorId> sorted_inputs{ inputs }; + std::sort(sorted_inputs.begin(), sorted_inputs.end()); + std::set_intersection(existing_tensors.begin(), existing_tensors.end(), + sorted_inputs.begin(), sorted_inputs.end(), std::back_inserter(common_tensors)); + if(common_tensors.size() != 1U) + { + return false; + } + const auto linked_tensor = common_tensors[0]; + const auto tail_ops = get_dst_ops(); + ARM_COMPUTE_ERROR_ON(tail_ops.size() != 1U); // PRECONDITION + const auto tail = tail_ops[0]; + + if(!is_in(linked_tensor, dst_tensors(tail))) + { + return false; + } + return true; + } + /** Add an operator, while keeping the graph as a "linear sequence" + * + * PRECONDITION: The current graph is already linear + * INVARIANT: The list can only grow from head to tail + * INVARIANT: POSTCONDITION: The graph is linear + */ + void add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs) + { + ARM_COMPUTE_ERROR_ON(!try_add_operator_as_linear(op, inputs, outputs)); + auto success = add_operator(op, inputs, outputs); + ARM_COMPUTE_ERROR_ON(!success); + } + /** Add a new operator + * Return invalid if it violates the DAG invariant + * Invalid operation will not change the graph + * + * @param[in] op Operator to add + * @param[in] inputs Input tensors to the operator + * @param[in] outputs Output tensors to the operator + */ + bool add_operator(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs) + { + if(operator_exists(op)) + { + return false; + } + _adj_src_tensors[op] = {}; + _adj_dst_tensors[op] = {}; + for(auto in_tensor : inputs) + { + // Linking input tensor to operator node will never create a cycle / loop because we guarantee + // each op is newly created, so every <input, op> pair / edge is new + link_input(op, in_tensor); + } + for(auto out_tensor : outputs) + { + // If there exists a back path from op's output tensor to op already, then linking the two will create a loop / cycle + if(path_exists_from_tensor_to_op(out_tensor, op)) + { + remove_operator(op); + return false; + } + else + { + link_output(op, out_tensor); + } + } + + return true; + } + + /** Sort the graph in a topological order + * + * @return std::vector<OpPack> + */ + std::vector<OpPack> topological_sort() const + { + // Incident degree (number of source operators to an op) + std::map<OperatorId, unsigned int> in_degree{}; + std::set<OperatorId> visited_ops{}; + std::deque<OperatorId> zero_in_degree_ops{}; + std::vector<OpPack> sorted_op_packs{}; + for(auto op : all_ops()) + { + const auto degree = src_ops(op).size(); + in_degree[op] = degree; + if(degree == 0) + { + zero_in_degree_ops.push_back(op); + visited_ops.insert(op); + } + } + + while(!zero_in_degree_ops.empty()) + { + const OperatorId op = zero_in_degree_ops.front(); + zero_in_degree_ops.pop_front(); + sorted_op_packs.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) }); + + for(const auto next_op : dst_ops(op)) + { + if(in_degree[next_op] > 0) + { + in_degree[next_op]--; + } + if(in_degree[next_op] == 0 && visited_ops.find(next_op) == visited_ops.end()) + { + zero_in_degree_ops.push_back(next_op); + visited_ops.insert(op); + } + } + } + + return sorted_op_packs; + } + + void find_independent_paths_util(OperatorId op, std::vector<std::vector<OperatorId>> &paths, std::vector<OperatorId> cur_path, + const std::map<OperatorId, unsigned int> &in_degree) const + { + // We have found an unresolved dependency + if(in_degree.at(op) > 1) + { + paths.push_back(cur_path); + return; + } + const auto child_ops = dst_ops(op); + + cur_path.push_back(op); + // Hit the leaf op + if(child_ops.empty()) + { + paths.push_back(cur_path); + return; + } + for(const auto child_op : child_ops) + { + find_independent_paths_util(child_op, paths, cur_path, in_degree); + } + } + /** Find all independent linear paths from op, which doesn't depend on any other op + * + * @return std::vector<OpPack> + */ + std::vector<std::vector<OperatorId>> find_independent_paths(OperatorId op, + const std::map<OperatorId, unsigned int> &in_degree) const + { + std::vector<std::vector<OperatorId>> paths; + std::vector<OperatorId> cur_path; + find_independent_paths_util(op, paths, cur_path, in_degree); + return paths; + } + /** Find a longest linear path from op, which doesn't depend on any other op + * + * @return std::vector<OpPack> + */ + std::vector<OperatorId> find_longest_independent_path(OperatorId op, + const std::map<OperatorId, unsigned int> &in_degree) const + { + const auto &paths = find_independent_paths(op, in_degree); + ARM_COMPUTE_ERROR_ON(paths.empty()); + size_t max_len = 0; + const std::vector<OperatorId> *max_path = nullptr; + + for(const auto &path : paths) + { + if(path.size() >= max_len) + { + max_path = &path; + max_len = path.size(); + } + } + return *max_path; + } + std::vector<OperatorId> propose_next_path(std::set<OperatorId> &candidate_ops, + const std::map<OperatorId, unsigned int> &in_degree) const + { + if(candidate_ops.empty()) + { + return {}; + } + size_t max_len = 0; + std::vector<OperatorId> max_path; + OperatorId chosen_op{}; + for(auto op : candidate_ops) + { + const auto path = find_longest_independent_path(op, in_degree); + if(path.size() >= max_len) + { + chosen_op = op; + max_path = path; + max_len = path.size(); + } + } + candidate_ops.erase(chosen_op); + return max_path; + } + /** Partition the graph into a list of linear sub-"graphs", while preserving the topological order, and trying to minimize + * the number of partitions + */ + std::vector<std::vector<OpPack>> topological_partition() const + { + // Initialize zero incident degree and zero in degree ops + std::map<OperatorId, unsigned int> in_degree{}; + std::set<OperatorId> candidate_ops{}; + for(auto op : all_ops()) + { + const auto degree = src_ops(op).size(); + in_degree[op] = degree; + if(degree == 0) + { + candidate_ops.insert(op); + } + } + + std::vector<std::vector<OpPack>> sorted_partitions{}; + while(!candidate_ops.empty()) + { + // generate_longest_path_from_zero_indegree_ops(in_degree, visited_ops, candidate_ops) + const auto path = propose_next_path(candidate_ops, in_degree); + + // Append to sorted_partitions + std::vector<OpPack> path_op_pack{}; + for(auto op : path) + { + path_op_pack.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) }); + } + sorted_partitions.push_back(path_op_pack); + // Remove whole path (Update in_degree, visited_ops, candidate_ops) + for(auto op : path) + { + for(const auto next_op_child : dst_ops(op)) + { + if(in_degree[next_op_child] > 0) + { + in_degree[next_op_child]--; + } + if(in_degree[next_op_child] == 0 && !is_in(next_op_child, path)) // We do not want to put the proposed path back into candidates + { + candidate_ops.insert(next_op_child); + } + } + } + } + return sorted_partitions; + } + + /** Strict equality comparison (all internal ids and order of insertion matter). + * In the future this may be replaced with a topological comparison, allowing equivalent graphs with different internal ids to be equal + * + * + * @param[in] g0 + * @param[in] g1 + * @return true If the same + * @return false Otherwise + */ + friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1) + { + // Do not compare id allocators + return std::make_tuple( + g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops) + == std::make_tuple( + g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops); + } + std::vector<OperatorId> src_ops_from_tensor(TensorId tensor) const + { + return _adj_src_ops.at(tensor); + } + std::vector<OperatorId> dst_ops_from_tensor(TensorId tensor) const + { + return _adj_dst_ops.at(tensor); + } + /** Get all tensors + * + * @return std::vector<TensorId> + */ + std::vector<TensorId> all_tensors() const + { + std::vector<TensorId> tensors{}; + std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors), [](const auto & it) + { + return it.first; + }); + return tensors; + } + /** Get source tensors of the whole graph + * + * @return std::vector<TensorId> + */ + std::vector<TensorId> global_src_tensors() const + { + std::vector<TensorId> tensors; + for(auto tensor_src_ops : _adj_src_ops) + { + if(tensor_src_ops.second.empty()) + { + tensors.push_back(tensor_src_ops.first); + } + } + return tensors; + } + /** Get destination tensors of the whole graph + * + * @return std::vector<TensorId> + */ + std::vector<TensorId> global_dst_tensors() const + { + std::vector<TensorId> tensors; + for(auto tensor_dst_ops : _adj_dst_ops) + { + if(tensor_dst_ops.second.empty()) + { + tensors.push_back(tensor_dst_ops.first); + } + } + return tensors; + } + /** Get all root ops. Root ops can also be referred to as "src ops" of the whole graph + * + * @return std::vector<OperatorId> + */ + std::vector<OperatorId> get_root_ops() const + { + std::vector<OperatorId> ops{}; + const auto op_list = all_ops(); + + for(auto op : op_list) + { + if(src_ops(op).empty()) + { + ops.emplace_back(op); + } + } + return ops; + } + +private: + void link_input(OperatorId op, TensorId in_tensor) + { + ARM_COMPUTE_ERROR_ON(!operator_exists(op)); + if(!tensor_exists(in_tensor)) + { + insert_new_tensor(in_tensor); + } + ARM_COMPUTE_ERROR_ON(are_connected(op, in_tensor)); // Prevent repetitive linking + _adj_src_tensors[op].push_back(in_tensor); + _adj_dst_ops[in_tensor].push_back(op); + } + void link_output(OperatorId op, TensorId out_tensor) + { + ARM_COMPUTE_ERROR_ON(!operator_exists(op)); + if(!tensor_exists(out_tensor)) + { + insert_new_tensor(out_tensor); + } + ARM_COMPUTE_ERROR_ON(are_connected(op, out_tensor)); // Prevent repetitive linking + _adj_dst_tensors[op].push_back(out_tensor); + _adj_src_ops[out_tensor].push_back(op); + } + + std::vector<OperatorId> src_ops(OperatorId op) const + { + ARM_COMPUTE_ERROR_ON(!operator_exists(op)); + std::vector<OperatorId> ops{}; + for(TensorId src_tensor : src_tensors(op)) + { + ops.insert(ops.end(), std::begin(_adj_src_ops.at(src_tensor)), std::end(_adj_src_ops.at(src_tensor))); + } + return ops; + } + std::vector<OperatorId> dst_ops(OperatorId op) const + { + ARM_COMPUTE_ERROR_ON(!operator_exists(op)); + std::vector<OperatorId> ops{}; + for(TensorId dst_tensor : _adj_dst_tensors.at(op)) + { + ops.insert(ops.end(), std::begin(_adj_dst_ops.at(dst_tensor)), std::end(_adj_dst_ops.at(dst_tensor))); + } + return ops; + } + + /** Get source tensors to an operator + * + * @param[in] op + * @return std::vector<TensorId> + */ + std::vector<TensorId> src_tensors(OperatorId op) const + { + ARM_COMPUTE_ERROR_ON(!operator_exists(op)); + return _adj_src_tensors.at(op); + } + /** Get destination tensors to an operator + * + * @param[in] op + * @return std::vector<TensorId> + */ + std::vector<TensorId> dst_tensors(OperatorId op) const + { + ARM_COMPUTE_ERROR_ON(!operator_exists(op)); + return _adj_dst_tensors.at(op); + } + /** Get all operators + * + * @return std::vector<OperatorId> + */ + std::vector<OperatorId> all_ops() const + { + std::vector<OperatorId> ops{}; + std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops), [](const auto & it) + { + return it.first; + }); + return ops; + } + /** Remove an operator from graph. + * + * @param[in] op + */ + void remove_operator(OperatorId op) + { + for(auto src_tensor : _adj_src_tensors.at(op)) + { + auto &dst_ops = _adj_dst_ops.at(src_tensor); + dst_ops.erase( + std::remove(std::begin(dst_ops), std::end(dst_ops), op), + std::end(dst_ops)); + } + for(auto dst_tensor : _adj_dst_tensors.at(op)) + { + auto &src_ops = _adj_src_ops.at(dst_tensor); + src_ops.erase( + std::remove(std::begin(src_ops), std::end(src_ops), op), + std::end(src_ops)); + } + // Remove any isolated tensors + // An isolated tensor is one where both its _adj_src_ops and _adj_dst_ops are empty + for(auto t : all_tensors()) + { + if(_adj_src_ops.at(t).empty() && _adj_dst_ops.at(t).empty()) + { + _adj_src_ops.erase(t); + _adj_dst_ops.erase(t); + } + } + _adj_src_tensors.erase(op); + _adj_dst_tensors.erase(op); + } + void insert_new_tensor(TensorId tensor) + { + _adj_src_ops[tensor] = {}; + _adj_dst_ops[tensor] = {}; + } + bool tensor_exists(TensorId tensor) const + { + return _adj_src_ops.find(tensor) != _adj_src_ops.end() && _adj_dst_ops.find(tensor) != _adj_dst_ops.end(); + } + bool operator_exists(OperatorId op) const + { + return _adj_src_tensors.find(op) != _adj_src_tensors.end() && _adj_dst_tensors.find(op) != _adj_dst_tensors.end(); + } + bool is_src_tensor_of(OperatorId op, TensorId tensor) const + { + if(!operator_exists(op) || !tensor_exists(tensor)) + { + return false; + } + const auto op_inputs = src_tensors(op); + return std::find(op_inputs.begin(), op_inputs.end(), tensor) != op_inputs.end(); + } + bool is_dst_tensor_of(OperatorId op, TensorId tensor) const + { + if(!operator_exists(op) || !tensor_exists(tensor)) + { + return false; + } + const auto op_outputs = dst_tensors(op); + return std::find(op_outputs.begin(), op_outputs.end(), tensor) != op_outputs.end(); + } + bool are_connected(OperatorId op, TensorId tensor) const + { + return is_src_tensor_of(op, tensor) || is_dst_tensor_of(op, tensor); + } + /** If op is the destination / leaf operator of the whole graph + * + * @param[in] op + * @return true + * @return false + */ + bool is_dst_op(OperatorId op) const + { + return dst_ops(op).empty(); + } + std::vector<OperatorId> get_dst_ops() const + { + std::vector<OperatorId> ops{}; + const auto op_list = all_ops(); + + for(auto op : op_list) + { + if(is_dst_op(op)) + { + ops.emplace_back(op); + } + } + return ops; + } + bool path_exists_from_tensor_to_op(TensorId src_tensor, OperatorId dst_op) const + { + if(!tensor_exists(src_tensor) || !operator_exists(dst_op)) + { + return false; + } + for(auto child_op : dst_ops_from_tensor(src_tensor)) + { + if(path_exists_from_op_to_op(child_op, dst_op)) + { + return true; + } + } + return false; + } + + bool path_exists_from_op_to_op(OperatorId src_op, OperatorId dst_op) const + { + if(!operator_exists(src_op) || !operator_exists(dst_op)) + { + return false; + } + if(src_op == dst_op) + { + return true; + } + if(is_in(src_op, get_dst_ops())) + { + return false; + } + for(auto child_tensor : dst_tensors(src_op)) + { + if(path_exists_from_tensor_to_op(child_tensor, dst_op)) + { + return true; + } + } + return false; + } + +private: + AdjList _adj_src_tensors{}; + AdjList _adj_dst_tensors{}; + AdjList _adj_src_ops{}; + AdjList _adj_dst_ops{}; +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH */ |