/* * Copyright (c) 2022 Arm Limited. * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH #define SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH #include "arm_compute/core/Error.h" #include #include #include #include #include #include #include namespace arm_compute { namespace experimental { namespace dynamic_fusion { namespace { template bool is_in(const T &v, const std::vector &vec) { return std::find(std::begin(vec), std::end(vec), v) != std::end(vec); } } // namespace /** A multi-input (tensors), multi-output (tensors) acyclic directed graph * Represented as a doubly-linked adjacency list with the differentiation between source and destination */ class DependencyGraph { public: using Id = int32_t; using TensorId = Id; using OperatorId = Id; /** Adjacency list * */ using AdjList = std::map>; /** A pack of operator including its input and output tensors, used by traversing through the graph in topological order * */ struct OpPack { OperatorId op{}; std::vector inputs{}; std::vector outputs{}; friend bool operator==(const OpPack &opp0, const OpPack &opp1) { return std::make_tuple( opp0.op, opp0.inputs, opp0.outputs) == std::make_tuple( opp1.op, opp1.inputs, opp1.outputs); } }; public: DependencyGraph() = default; friend std::ostream &operator<<(std::ostream &os, const DependencyGraph &); /** Try adding an operator (without actually adding it), while keeping the graph as a "linear sequence" / list * @note The list is expected to only grow from head to tail * * PRECONDITION: The current graph is already linear * * @return true If the operator can be added while keeping the graph as a linear sequence * @return false Otherwise */ bool try_add_operator_as_linear(OperatorId op, const std::vector &inputs, const std::vector &outputs) const { ARM_COMPUTE_UNUSED(op, outputs); if(all_ops().empty()) { return true; } std::vector common_tensors{}; auto existing_tensors = all_tensors(); std::sort(existing_tensors.begin(), existing_tensors.end()); // To use std::set_intersection, both input sets must be sorted std::vector sorted_inputs{ inputs }; std::sort(sorted_inputs.begin(), sorted_inputs.end()); std::set_intersection(existing_tensors.begin(), existing_tensors.end(), sorted_inputs.begin(), sorted_inputs.end(), std::back_inserter(common_tensors)); if(common_tensors.size() != 1U) { return false; } const auto linked_tensor = common_tensors[0]; const auto tail_ops = get_dst_ops(); ARM_COMPUTE_ERROR_ON(tail_ops.size() != 1U); // PRECONDITION const auto tail = tail_ops[0]; if(!is_in(linked_tensor, dst_tensors(tail))) { return false; } return true; } /** Add an operator, while keeping the graph as a "linear sequence" * * PRECONDITION: The current graph is already linear * INVARIANT: The list can only grow from head to tail * INVARIANT: POSTCONDITION: The graph is linear */ void add_operator_as_linear(OperatorId op, const std::vector &inputs, const std::vector &outputs) { ARM_COMPUTE_ERROR_ON(!try_add_operator_as_linear(op, inputs, outputs)); auto success = add_operator(op, inputs, outputs); ARM_COMPUTE_UNUSED(success); ARM_COMPUTE_ERROR_ON(!success); } /** Add a new operator * Return invalid if it violates the DAG invariant * Invalid operation will not change the graph * * @param[in] op Operator to add * @param[in] inputs Input tensors to the operator * @param[in] outputs Output tensors to the operator */ bool add_operator(OperatorId op, const std::vector &inputs, const std::vector &outputs) { if(operator_exists(op)) { return false; } _adj_src_tensors[op] = {}; _adj_dst_tensors[op] = {}; for(auto in_tensor : inputs) { // Linking input tensor to operator node will never create a cycle / loop because we guarantee // each op is newly created, so every pair / edge is new link_input(op, in_tensor); } for(auto out_tensor : outputs) { // If there exists a back path from op's output tensor to op already, then linking the two will create a loop / cycle if(path_exists_from_tensor_to_op(out_tensor, op)) { remove_operator(op); return false; } else { link_output(op, out_tensor); } } return true; } /** Sort the graph in a topological order * * @return std::vector */ std::vector topological_sort() const { // Incident degree (number of source operators to an op) std::map in_degree{}; std::set visited_ops{}; std::deque zero_in_degree_ops{}; std::vector sorted_op_packs{}; for(auto op : all_ops()) { const auto degree = src_ops(op).size(); in_degree[op] = degree; if(degree == 0) { zero_in_degree_ops.push_back(op); visited_ops.insert(op); } } while(!zero_in_degree_ops.empty()) { const OperatorId op = zero_in_degree_ops.front(); zero_in_degree_ops.pop_front(); sorted_op_packs.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) }); for(const auto next_op : dst_ops(op)) { if(in_degree[next_op] > 0) { in_degree[next_op]--; } if(in_degree[next_op] == 0 && visited_ops.find(next_op) == visited_ops.end()) { zero_in_degree_ops.push_back(next_op); visited_ops.insert(op); } } } return sorted_op_packs; } void find_independent_paths_util(OperatorId op, std::vector> &paths, std::vector cur_path, const std::map &in_degree) const { // We have found an unresolved dependency if(in_degree.at(op) > 1) { paths.push_back(cur_path); return; } const auto child_ops = dst_ops(op); cur_path.push_back(op); // Hit the leaf op if(child_ops.empty()) { paths.push_back(cur_path); return; } for(const auto child_op : child_ops) { find_independent_paths_util(child_op, paths, cur_path, in_degree); } } /** Find all independent linear paths from op, which doesn't depend on any other op * * @return std::vector */ std::vector> find_independent_paths(OperatorId op, const std::map &in_degree) const { std::vector> paths; std::vector cur_path; find_independent_paths_util(op, paths, cur_path, in_degree); return paths; } /** Find a longest linear path from op, which doesn't depend on any other op * * @return std::vector */ std::vector find_longest_independent_path(OperatorId op, const std::map &in_degree) const { const auto &paths = find_independent_paths(op, in_degree); ARM_COMPUTE_ERROR_ON(paths.empty()); size_t max_len = 0; const std::vector *max_path = nullptr; for(const auto &path : paths) { if(path.size() >= max_len) { max_path = &path; max_len = path.size(); } } return *max_path; } std::vector propose_next_path(std::set &candidate_ops, const std::map &in_degree) const { if(candidate_ops.empty()) { return {}; } size_t max_len = 0; std::vector max_path; OperatorId chosen_op{}; for(auto op : candidate_ops) { const auto path = find_longest_independent_path(op, in_degree); if(path.size() >= max_len) { chosen_op = op; max_path = path; max_len = path.size(); } } candidate_ops.erase(chosen_op); return max_path; } /** Partition the graph into a list of linear sub-"graphs", while preserving the topological order, and trying to minimize * the number of partitions */ std::vector> topological_partition() const { // Initialize zero incident degree and zero in degree ops std::map in_degree{}; std::set candidate_ops{}; for(auto op : all_ops()) { const auto degree = src_ops(op).size(); in_degree[op] = degree; if(degree == 0) { candidate_ops.insert(op); } } std::vector> sorted_partitions{}; while(!candidate_ops.empty()) { // generate_longest_path_from_zero_indegree_ops(in_degree, visited_ops, candidate_ops) const auto path = propose_next_path(candidate_ops, in_degree); // Append to sorted_partitions std::vector path_op_pack{}; for(auto op : path) { path_op_pack.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) }); } sorted_partitions.push_back(path_op_pack); // Remove whole path (Update in_degree, visited_ops, candidate_ops) for(auto op : path) { for(const auto next_op_child : dst_ops(op)) { if(in_degree[next_op_child] > 0) { in_degree[next_op_child]--; } if(in_degree[next_op_child] == 0 && !is_in(next_op_child, path)) // We do not want to put the proposed path back into candidates { candidate_ops.insert(next_op_child); } } } } return sorted_partitions; } /** Strict equality comparison (all internal ids and order of insertion matter). * In the future this may be replaced with a topological comparison, allowing equivalent graphs with different internal ids to be equal * * * @param[in] g0 * @param[in] g1 * @return true If the same * @return false Otherwise */ friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1) { // Do not compare id allocators return std::make_tuple( g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops) == std::make_tuple( g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops); } std::vector src_ops_from_tensor(TensorId tensor) const { return _adj_src_ops.at(tensor); } std::vector dst_ops_from_tensor(TensorId tensor) const { return _adj_dst_ops.at(tensor); } /** Get all tensors * * @return std::vector */ std::vector all_tensors() const { std::vector tensors{}; std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors), [](const auto & it) { return it.first; }); return tensors; } /** Get source tensors of the whole graph * * @return std::vector */ std::vector global_src_tensors() const { std::vector tensors; for(auto tensor_src_ops : _adj_src_ops) { if(tensor_src_ops.second.empty()) { tensors.push_back(tensor_src_ops.first); } } return tensors; } /** Get destination tensors of the whole graph * * @return std::vector */ std::vector global_dst_tensors() const { std::vector tensors; for(auto tensor_dst_ops : _adj_dst_ops) { if(tensor_dst_ops.second.empty()) { tensors.push_back(tensor_dst_ops.first); } } return tensors; } /** Get all root ops. Root ops can also be referred to as "src ops" of the whole graph * * @return std::vector */ std::vector get_root_ops() const { std::vector ops{}; const auto op_list = all_ops(); for(auto op : op_list) { if(src_ops(op).empty()) { ops.emplace_back(op); } } return ops; } private: void link_input(OperatorId op, TensorId in_tensor) { ARM_COMPUTE_ERROR_ON(!operator_exists(op)); if(!tensor_exists(in_tensor)) { insert_new_tensor(in_tensor); } ARM_COMPUTE_ERROR_ON(are_connected(op, in_tensor)); // Prevent repetitive linking _adj_src_tensors[op].push_back(in_tensor); _adj_dst_ops[in_tensor].push_back(op); } void link_output(OperatorId op, TensorId out_tensor) { ARM_COMPUTE_ERROR_ON(!operator_exists(op)); if(!tensor_exists(out_tensor)) { insert_new_tensor(out_tensor); } ARM_COMPUTE_ERROR_ON(are_connected(op, out_tensor)); // Prevent repetitive linking _adj_dst_tensors[op].push_back(out_tensor); _adj_src_ops[out_tensor].push_back(op); } std::vector src_ops(OperatorId op) const { ARM_COMPUTE_ERROR_ON(!operator_exists(op)); std::vector ops{}; for(TensorId src_tensor : src_tensors(op)) { ops.insert(ops.end(), std::begin(_adj_src_ops.at(src_tensor)), std::end(_adj_src_ops.at(src_tensor))); } return ops; } std::vector dst_ops(OperatorId op) const { ARM_COMPUTE_ERROR_ON(!operator_exists(op)); std::vector ops{}; for(TensorId dst_tensor : _adj_dst_tensors.at(op)) { ops.insert(ops.end(), std::begin(_adj_dst_ops.at(dst_tensor)), std::end(_adj_dst_ops.at(dst_tensor))); } return ops; } /** Get source tensors to an operator * * @param[in] op * @return std::vector */ std::vector src_tensors(OperatorId op) const { ARM_COMPUTE_ERROR_ON(!operator_exists(op)); return _adj_src_tensors.at(op); } /** Get destination tensors to an operator * * @param[in] op * @return std::vector */ std::vector dst_tensors(OperatorId op) const { ARM_COMPUTE_ERROR_ON(!operator_exists(op)); return _adj_dst_tensors.at(op); } /** Get all operators * * @return std::vector */ std::vector all_ops() const { std::vector ops{}; std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops), [](const auto & it) { return it.first; }); return ops; } /** Remove an operator from graph. * * @param[in] op */ void remove_operator(OperatorId op) { for(auto src_tensor : _adj_src_tensors.at(op)) { auto &dst_ops = _adj_dst_ops.at(src_tensor); dst_ops.erase( std::remove(std::begin(dst_ops), std::end(dst_ops), op), std::end(dst_ops)); } for(auto dst_tensor : _adj_dst_tensors.at(op)) { auto &src_ops = _adj_src_ops.at(dst_tensor); src_ops.erase( std::remove(std::begin(src_ops), std::end(src_ops), op), std::end(src_ops)); } // Remove any isolated tensors // An isolated tensor is one where both its _adj_src_ops and _adj_dst_ops are empty for(auto t : all_tensors()) { if(_adj_src_ops.at(t).empty() && _adj_dst_ops.at(t).empty()) { _adj_src_ops.erase(t); _adj_dst_ops.erase(t); } } _adj_src_tensors.erase(op); _adj_dst_tensors.erase(op); } void insert_new_tensor(TensorId tensor) { _adj_src_ops[tensor] = {}; _adj_dst_ops[tensor] = {}; } bool tensor_exists(TensorId tensor) const { return _adj_src_ops.find(tensor) != _adj_src_ops.end() && _adj_dst_ops.find(tensor) != _adj_dst_ops.end(); } bool operator_exists(OperatorId op) const { return _adj_src_tensors.find(op) != _adj_src_tensors.end() && _adj_dst_tensors.find(op) != _adj_dst_tensors.end(); } bool is_src_tensor_of(OperatorId op, TensorId tensor) const { if(!operator_exists(op) || !tensor_exists(tensor)) { return false; } const auto op_inputs = src_tensors(op); return std::find(op_inputs.begin(), op_inputs.end(), tensor) != op_inputs.end(); } bool is_dst_tensor_of(OperatorId op, TensorId tensor) const { if(!operator_exists(op) || !tensor_exists(tensor)) { return false; } const auto op_outputs = dst_tensors(op); return std::find(op_outputs.begin(), op_outputs.end(), tensor) != op_outputs.end(); } bool are_connected(OperatorId op, TensorId tensor) const { return is_src_tensor_of(op, tensor) || is_dst_tensor_of(op, tensor); } /** If op is the destination / leaf operator of the whole graph * * @param[in] op * @return true * @return false */ bool is_dst_op(OperatorId op) const { return dst_ops(op).empty(); } std::vector get_dst_ops() const { std::vector ops{}; const auto op_list = all_ops(); for(auto op : op_list) { if(is_dst_op(op)) { ops.emplace_back(op); } } return ops; } bool path_exists_from_tensor_to_op(TensorId src_tensor, OperatorId dst_op) const { if(!tensor_exists(src_tensor) || !operator_exists(dst_op)) { return false; } for(auto child_op : dst_ops_from_tensor(src_tensor)) { if(path_exists_from_op_to_op(child_op, dst_op)) { return true; } } return false; } bool path_exists_from_op_to_op(OperatorId src_op, OperatorId dst_op) const { if(!operator_exists(src_op) || !operator_exists(dst_op)) { return false; } if(src_op == dst_op) { return true; } if(is_in(src_op, get_dst_ops())) { return false; } for(auto child_tensor : dst_tensors(src_op)) { if(path_exists_from_tensor_to_op(child_tensor, dst_op)) { return true; } } return false; } private: AdjList _adj_src_tensors{}; AdjList _adj_dst_tensors{}; AdjList _adj_src_ops{}; AdjList _adj_dst_ops{}; }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute #endif /* SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH */