From 2a2db590fd179dcb8e1a575293cd2b887e2dc246 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 15 Aug 2018 12:14:46 +0100 Subject: COMPMID-1505: Add native grouping support at graph level Change-Id: Iedc91b0aee743b59af5140c8acb8124548da3163 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/144362 Tested-by: Jenkins Reviewed-by: Giorgio Arena Reviewed-by: Michele DiGiorgio --- src/graph/GraphBuilder.cpp | 74 ++------ src/graph/GraphManager.cpp | 6 +- src/graph/INode.cpp | 5 + src/graph/Utils.cpp | 41 +++++ src/graph/algorithms/TopologicalSort.cpp | 188 +++++++++++++++++++++ src/graph/backends/GLES/GCNodeValidator.cpp | 6 +- src/graph/detail/ExecutionHelpers.cpp | 13 +- src/graph/mutators/DepthConcatSubTensorMutator.cpp | 19 ++- src/graph/mutators/GroupedConvolutionMutator.cpp | 186 ++++++++++++++++++++ src/graph/mutators/NodeExecutionMethodMutator.cpp | 97 +++++++++++ src/graph/mutators/NodeFusionMutator.cpp | 13 +- src/graph/mutators/SplitLayerSubTensorMutator.cpp | 17 +- src/graph/nodes/ConvolutionLayerNode.cpp | 13 +- 13 files changed, 581 insertions(+), 97 deletions(-) create mode 100644 src/graph/algorithms/TopologicalSort.cpp create mode 100644 src/graph/mutators/GroupedConvolutionMutator.cpp create mode 100644 src/graph/mutators/NodeExecutionMethodMutator.cpp (limited to 'src/graph') diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp index fa78024e71..81a18c437f 100644 --- a/src/graph/GraphBuilder.cpp +++ b/src/graph/GraphBuilder.cpp @@ -25,7 +25,7 @@ #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/Utils.h" -#include "arm_compute/graph/algorithms/BFS.h" +#include "arm_compute/graph/algorithms/TopologicalSort.h" #include "arm_compute/graph/nodes/Nodes.h" #include "support/ToolchainSupport.h" @@ -81,53 +81,6 @@ NodeID create_simple_single_input_output_node(Graph &g, NodeParams ¶ms, Node return nid; } - -NodeID create_grouped_convolution(Graph &g, const NodeParams ¶ms, NodeIdxPair input, NodeID weights, NodeID bias, - PadStrideInfo conv_info, ConvolutionMethod method, FastMathHint fast_math_hint, unsigned int num_groups) -{ - bool has_bias = (bias != EmptyNodeID); - - // Split input - const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]); - const unsigned int input_idx = get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL); - NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx); - - // Split weights - const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]); - const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc, DataLayoutDimension::BATCHES); - NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx); - - // Split bias - NodeID bias_split = EmptyNodeID; - if(has_bias) - { - // Split bias - bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0); - } - - std::vector convolution_outputs; - for(unsigned int i = 0; i < num_groups; ++i) - { - NodeParams group_params = params; - NodeID conv_nid = g.add_node(conv_info, method, fast_math_hint); - g.add_connection(input_split, i, conv_nid, 0); - g.add_connection(weights_split, i, conv_nid, 1); - if(has_bias) - { - g.add_connection(bias_split, i, conv_nid, 2); - } - // Add group name - if(!group_params.name.empty()) - { - group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i)); - } - set_node_params(g, conv_nid, group_params); - convolution_outputs.push_back({ conv_nid, 0 }); - } - - // Depth concatenate output - return GraphBuilder::add_concatenate_node(g, params, convolution_outputs, DataLayoutDimension::CHANNEL); -} } // namespace NodeID GraphBuilder::add_const_node(Graph &g, NodeParams params, TensorDescriptor desc, ITensorAccessorUPtr accessor) @@ -263,24 +216,17 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor)); } - if(num_groups == 1) - { - // Create convolution node and connect - NodeID conv_nid = g.add_node(conv_info, method, fast_math_hint, out_quant_info); - g.add_connection(input.node_id, input.index, conv_nid, 0); - g.add_connection(w_nid, 0, conv_nid, 1); - if(has_bias) - { - g.add_connection(b_nid, 0, conv_nid, 2); - } - set_node_params(g, conv_nid, params); - - return conv_nid; - } - else + // Create convolution node and connect + NodeID conv_nid = g.add_node(conv_info, num_groups, method, fast_math_hint, out_quant_info); + g.add_connection(input.node_id, input.index, conv_nid, 0); + g.add_connection(w_nid, 0, conv_nid, 1); + if(has_bias) { - return create_grouped_convolution(g, params, input, w_nid, b_nid, conv_info, method, fast_math_hint, num_groups); + g.add_connection(b_nid, 0, conv_nid, 2); } + set_node_params(g, conv_nid, params); + + return conv_nid; } NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdxPair input, diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp index db6650cf69..5be4e7e2ba 100644 --- a/src/graph/GraphManager.cpp +++ b/src/graph/GraphManager.cpp @@ -32,6 +32,8 @@ #include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h" #include "arm_compute/graph/detail/ExecutionHelpers.h" +#include "arm_compute/graph/algorithms/TopologicalSort.h" + namespace arm_compute { namespace graph @@ -69,13 +71,13 @@ void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager & pm.run_all(graph); // Perform topological sort - // FIXME : Sort nodes and pass sorted indices in configure all nodes + std::vector topological_sorted_nodes = dfs(graph); // Validate all nodes detail::validate_all_nodes(graph); // Configure all nodes - auto workload = detail::configure_all_nodes(graph, ctx); + auto workload = detail::configure_all_nodes(graph, ctx, topological_sorted_nodes); ARM_COMPUTE_ERROR_ON_MSG(workload.tasks.empty(), "Could not configure all nodes!"); // Allocate const tensors and call accessors diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp index cd9a46ac40..b0c31372aa 100644 --- a/src/graph/INode.cpp +++ b/src/graph/INode.cpp @@ -185,6 +185,11 @@ size_t INode::num_outputs() const return _outputs.size(); } +NodeParams INode::common_node_params() const +{ + return _common_params; +} + Target INode::requested_target() const { return _common_params.target; diff --git a/src/graph/Utils.cpp b/src/graph/Utils.cpp index 4715694f15..75644a8933 100644 --- a/src/graph/Utils.cpp +++ b/src/graph/Utils.cpp @@ -78,13 +78,21 @@ PassManager create_default_pass_manager(Target target) { PassManager pm; + // Passes that mutate graph IR + pm.append(support::cpp14::make_unique()); if(target != Target::GC) { pm.append(support::cpp14::make_unique()); pm.append(support::cpp14::make_unique()); + } + + // Passes that mutate backend information + if(target != Target::GC) + { pm.append(support::cpp14::make_unique()); pm.append(support::cpp14::make_unique()); } + pm.append(support::cpp14::make_unique()); return pm; } @@ -139,5 +147,38 @@ size_t get_dimension_idx(const TensorDescriptor &descriptor, const DataLayoutDim break; } } + +std::vector get_driving_nodes(const INode &node) +{ + std::vector driving_nodes; + + const Graph *g = node.graph(); + ARM_COMPUTE_ERROR_ON(g == nullptr); + + for(auto &output_edge_id : node.output_edges()) + { + auto output_edge = g->edge(output_edge_id); + if(output_edge != nullptr) + { + ARM_COMPUTE_ERROR_ON(output_edge->consumer() == nullptr); + driving_nodes.push_back({ output_edge->consumer_id(), output_edge->consumer_idx() }); + } + } + + return driving_nodes; +} + +void configure_tensor(Tensor *tensor) +{ + if(tensor != nullptr && tensor->handle() == nullptr) + { + Target target = tensor->desc().target; + auto backend = backends::BackendRegistry::get().find_backend(target); + ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!"); + auto handle = backend->create_tensor(*tensor); + ARM_COMPUTE_ERROR_ON_MSG(!backend, "Couldn't create backend handle!"); + tensor->set_handle(std::move(handle)); + } +} } // namespace graph } // namespace arm_compute diff --git a/src/graph/algorithms/TopologicalSort.cpp b/src/graph/algorithms/TopologicalSort.cpp new file mode 100644 index 0000000000..0fbf6e32e8 --- /dev/null +++ b/src/graph/algorithms/TopologicalSort.cpp @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/graph/algorithms/TopologicalSort.h" + +#include "arm_compute/graph/Graph.h" + +#include "arm_compute/core/utils/misc/Iterable.h" + +#include +#include + +namespace arm_compute +{ +namespace graph +{ +namespace detail +{ +/** Checks if all the input dependencies of a node have been visited + * + * @param[in] node Node to check + * @param[in] visited Vector that contains the visited information + * + * @return True if all inputs dependencies have been visited else false + */ +inline bool all_inputs_are_visited(const INode *node, const std::vector &visited) +{ + ARM_COMPUTE_ERROR_ON(node == nullptr); + const Graph *graph = node->graph(); + ARM_COMPUTE_ERROR_ON(graph == nullptr); + + bool are_all_visited = true; + for(const auto &input_edge_id : node->input_edges()) + { + if(input_edge_id != EmptyNodeID) + { + const Edge *input_edge = graph->edge(input_edge_id); + ARM_COMPUTE_ERROR_ON(input_edge == nullptr); + ARM_COMPUTE_ERROR_ON(input_edge->producer() == nullptr); + if(!visited[input_edge->producer_id()]) + { + are_all_visited = false; + break; + } + } + } + + return are_all_visited; +} +} // namespace detail + +std::vector bfs(Graph &g) +{ + std::vector bfs_order_vector; + + // Created visited vector + std::vector visited(g.nodes().size(), false); + + // Create BFS queue + std::list queue; + + // Push inputs and mark as visited + for(auto &input : g.nodes(NodeType::Input)) + { + if(input != EmptyNodeID) + { + visited[input] = true; + queue.push_back(input); + } + } + + // Push const nodes and mark as visited + for(auto &const_node : g.nodes(NodeType::Const)) + { + if(const_node != EmptyNodeID) + { + visited[const_node] = true; + queue.push_back(const_node); + } + } + + // Iterate over vector and edges + while(!queue.empty()) + { + // Dequeue a node from queue and process + NodeID n = queue.front(); + bfs_order_vector.push_back(n); + queue.pop_front(); + + const INode *node = g.node(n); + ARM_COMPUTE_ERROR_ON(node == nullptr); + for(const auto &eid : node->output_edges()) + { + const Edge *e = g.edge(eid); + ARM_COMPUTE_ERROR_ON(e == nullptr); + if(!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited)) + { + visited[e->consumer_id()] = true; + queue.push_back(e->consumer_id()); + } + } + } + + return bfs_order_vector; +} + +std::vector dfs(Graph &g) +{ + std::vector dfs_order_vector; + + // Created visited vector + std::vector visited(g.nodes().size(), false); + + // Create DFS stack + std::stack stack; + + // Push inputs and mark as visited + for(auto &input : g.nodes(NodeType::Input)) + { + if(input != EmptyNodeID) + { + visited[input] = true; + stack.push(input); + } + } + + // Push const nodes and mark as visited + for(auto &const_node : g.nodes(NodeType::Const)) + { + if(const_node != EmptyNodeID) + { + visited[const_node] = true; + stack.push(const_node); + } + } + + // Iterate over vector and edges + while(!stack.empty()) + { + // Pop a node from stack and process + NodeID n = stack.top(); + dfs_order_vector.push_back(n); + stack.pop(); + + // Mark node as visited + if(!visited[n]) + { + visited[n] = true; + } + + const INode *node = g.node(n); + ARM_COMPUTE_ERROR_ON(node == nullptr); + // Reverse iterate to push branches from right to left and pop on the opposite order + for(const auto &eid : arm_compute::utils::iterable::reverse_iterate(node->output_edges())) + { + const Edge *e = g.edge(eid); + ARM_COMPUTE_ERROR_ON(e == nullptr); + if(!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited)) + { + stack.push(e->consumer_id()); + } + } + } + + return dfs_order_vector; +} +} // namespace graph +} // namespace arm_compute \ No newline at end of file diff --git a/src/graph/backends/GLES/GCNodeValidator.cpp b/src/graph/backends/GLES/GCNodeValidator.cpp index 542cfdf973..31d1c8b456 100644 --- a/src/graph/backends/GLES/GCNodeValidator.cpp +++ b/src/graph/backends/GLES/GCNodeValidator.cpp @@ -80,15 +80,13 @@ Status validate_convolution_layer(ConvolutionLayerNode &node) const ConvolutionMethod conv_algorithm = node.convolution_method(); // Validate function + ARM_COMPUTE_RETURN_ERROR_ON_MSG(node.num_groups() != 1, "Grouping is not supported by ConvolutionLayer!"); if(conv_algorithm == ConvolutionMethod::Direct) { bool is_square = weights->tensor_shape().x() == weights->tensor_shape().y(); bool is_direct = (weights->tensor_shape().x() == 1) || (weights->tensor_shape().x() == 3) || (weights->tensor_shape().x() == 5); bool is_correct_stride = (conv_info.stride().first) <= 2 && (conv_info.stride().second <= 2); - if(!(is_square && is_direct && is_correct_stride)) - { - node.set_convolution_method(ConvolutionMethod::Default); - } + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(is_square && is_direct && is_correct_stride), "Direct convolution is not supported for given configuration"); } return Status{}; diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp index 6df67fcfec..6157b7fecf 100644 --- a/src/graph/detail/ExecutionHelpers.cpp +++ b/src/graph/detail/ExecutionHelpers.cpp @@ -59,7 +59,7 @@ void configure_all_tensors(Graph &g) for(auto &tensor : tensors) { - if(tensor) + if(tensor && tensor->handle() == nullptr) { Target target = tensor->desc().target; auto backend = backends::BackendRegistry::get().find_backend(target); @@ -131,17 +131,16 @@ void allocate_all_tensors(Graph &g) } } -ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx) +ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::vector &node_order) { ExecutionWorkload workload; workload.graph = &g; workload.ctx = &ctx; - auto &nodes = g.nodes(); - // Create tasks - for(auto &node : nodes) + for(auto &node_id : node_order) { + auto node = g.node(node_id); if(node != nullptr) { Target assigned_target = node->assigned_target(); @@ -152,14 +151,14 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx) { ExecutionTask task; task.task = std::move(func); - task.node = node.get(); + task.node = node; workload.tasks.push_back(std::move(task)); } } } // Add inputs and outputs - for(auto &node : nodes) + for(auto &node : g.nodes()) { if(node != nullptr && node->type() == NodeType::Input) { diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp index 241c07b367..937528d143 100644 --- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp +++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp @@ -26,6 +26,7 @@ #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/Logger.h" #include "arm_compute/graph/Utils.h" +#include "arm_compute/graph/algorithms/TopologicalSort.h" #include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/ConcatenateLayerNode.h" @@ -43,16 +44,26 @@ const char *DepthConcatSubTensorMutator::name() void DepthConcatSubTensorMutator::mutate(Graph &g) { + // Early exit if no Concatenation layers exist in graph + if(g.nodes(NodeType::ConcatenateLayer).empty()) + { + return; + } + + // Perform topological sort + std::vector topological_sorted_node_ids = dfs(g); + // Should be in reverse order of execution - for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes())) + for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids)) { - if(node && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr) + INode *node = g.node(node_id); + if(node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr) { // Get output tensor auto output_tensor = node->output(0); // Check concatenation axis (Sub-tensor optimization is support for concatenation axis >=2) - auto *concat_node = arm_compute::utils::cast::polymorphic_downcast(node.get()); + auto *concat_node = arm_compute::utils::cast::polymorphic_downcast(node); if(output_tensor == nullptr || get_dimension_idx(output_tensor->desc(), concat_node->concatenation_axis()) < 2) { continue; @@ -84,7 +95,7 @@ void DepthConcatSubTensorMutator::mutate(Graph &g) depth += input_shape.z(); } - auto *dc_node = arm_compute::utils::cast::polymorphic_downcast(node.get()); + auto *dc_node = arm_compute::utils::cast::polymorphic_downcast(node); dc_node->set_enabled(false); } } diff --git a/src/graph/mutators/GroupedConvolutionMutator.cpp b/src/graph/mutators/GroupedConvolutionMutator.cpp new file mode 100644 index 0000000000..d2643d5428 --- /dev/null +++ b/src/graph/mutators/GroupedConvolutionMutator.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/graph/mutators/GroupedConvolutionMutator.h" + +#include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/GraphBuilder.h" +#include "arm_compute/graph/Logger.h" +#include "arm_compute/graph/Utils.h" +#include "arm_compute/graph/backends/BackendRegistry.h" +#include "arm_compute/graph/nodes/Nodes.h" + +#include "arm_compute/core/utils/misc/Cast.h" + +#include + +namespace arm_compute +{ +namespace graph +{ +namespace +{ +NodeID create_grouped_convolution(Graph &g, const NodeParams ¶ms, NodeIdxPair input, NodeID weights, NodeID bias, + PadStrideInfo conv_info, ConvolutionMethod method, FastMathHint fast_math_hint, unsigned int num_groups) +{ + bool has_bias = (bias != EmptyNodeID); + + // Split input + const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]); + const unsigned int input_idx = get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL); + NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx); + + // Split weights + const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]); + const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc, DataLayoutDimension::BATCHES); + NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx); + + // Split bias + NodeID bias_split = EmptyNodeID; + if(has_bias) + { + // Split bias + bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0); + } + + std::vector convolution_outputs; + for(unsigned int i = 0; i < num_groups; ++i) + { + NodeParams group_params = params; + NodeID conv_nid = g.add_node(conv_info, 1, method, fast_math_hint); + g.add_connection(input_split, i, conv_nid, 0); + g.add_connection(weights_split, i, conv_nid, 1); + if(has_bias) + { + g.add_connection(bias_split, i, conv_nid, 2); + } + + // Add group name + if(!group_params.name.empty()) + { + group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i)); + } + + // Set node parameters + INode *node = g.node(conv_nid); + ARM_COMPUTE_ERROR_ON(node == nullptr); + node->set_common_node_parameters(group_params); + + convolution_outputs.push_back({ conv_nid, 0 }); + } + + // Depth concatenate output + return GraphBuilder::add_concatenate_node(g, params, convolution_outputs, DataLayoutDimension::CHANNEL); +} +} // namespace + +const char *GroupedConvolutionMutator::name() +{ + return "GroupedConvolutionMutator"; +} + +void GroupedConvolutionMutator::mutate(Graph &g) +{ + // Early exit if no Convolution layers exist in graph + if(g.nodes(NodeType::ConvolutionLayer).empty()) + { + return; + } + + // Total nodes + size_t total_nodes = g.nodes().size(); + + // Iterate over convolution nodes + for(unsigned int i = 0; i < total_nodes; ++i) + { + INode *node = g.node(i); + if(node != nullptr && node->type() == NodeType::ConvolutionLayer && arm_compute::utils::cast::polymorphic_downcast(node)->num_groups() != 1) + { + // Validate node + backends::IDeviceBackend *backend = backends::BackendRegistry::get().find_backend(node->assigned_target()); + Status status = backend->validate_node(*node); + + // If grouped convolution is not supported + if(!bool(status)) + { + // Down-cast node + auto *conv_node = arm_compute::utils::cast::polymorphic_downcast(node); + + // Get internal convolution info + // TODO (geopin01) : Create a descriptor + const PadStrideInfo conv_info = conv_node->convolution_info(); + const ConvolutionMethod conv_method = conv_node->convolution_method(); + const FastMathHint fast_math_hint = conv_node->fast_math_hint(); + const unsigned int num_groups = conv_node->num_groups(); + const NodeParams params = conv_node->common_node_params(); + const Target assigned_target = conv_node->assigned_target(); + + // Extract node ids + const NodeID input_id = conv_node->input_id(0); + const NodeID weights_id = conv_node->input_id(1); + const NodeID bias_id = conv_node->input_id(2); + + // Get driving nodes + std::vector driving_nodes = get_driving_nodes(*node); + + // Extract activation node accessor if any + auto node_accessor = conv_node->output(0)->extract_accessor(); + + // Current max tensor and node id + TensorID latest_tid = g.tensors().size(); + NodeID latest_nid = g.nodes().size(); + + // Create grouped convolution node + NodeID grouped_conv_id = create_grouped_convolution(g, params, { input_id, 0 }, weights_id, bias_id, + conv_info, conv_method, fast_math_hint, num_groups); + + // Remove convolution node + g.remove_node(node->id()); + + // Update batch normalization node outputs + for(auto &driving_node : driving_nodes) + { + g.add_connection(grouped_conv_id, 0, driving_node.node_id, driving_node.index); + } + + // Update accessor to batch normalization node + g.node(grouped_conv_id)->output(0)->set_accessor(std::move(node_accessor)); + + // Configure new tensors and nodes + std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(), [](std::unique_ptr &t) + { + configure_tensor(t.get()); + }); + std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(), [&assigned_target](std::unique_ptr &n) + { + if(n != nullptr) + { + n->set_assigned_target(assigned_target); + } + }); + } + } + } +} +} // namespace graph +} // namespace arm_compute diff --git a/src/graph/mutators/NodeExecutionMethodMutator.cpp b/src/graph/mutators/NodeExecutionMethodMutator.cpp new file mode 100644 index 0000000000..896bf0742c --- /dev/null +++ b/src/graph/mutators/NodeExecutionMethodMutator.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/graph/mutators/NodeExecutionMethodMutator.h" + +#include "arm_compute/graph/Graph.h" +#include "arm_compute/graph/Logger.h" +#include "arm_compute/graph/Utils.h" +#include "arm_compute/graph/backends/BackendRegistry.h" +#include "arm_compute/graph/nodes/Nodes.h" + +#include "arm_compute/core/utils/misc/Cast.h" + +namespace arm_compute +{ +namespace graph +{ +namespace +{ +/** Runs a default setter function on a given types of nodes + * + * @tparam Setter Setter function to run + * + * @param[in, out] g Graph to extract the nodes from + * @param[in] node_type Node type + * @param[in] setter Setter function + */ +template +void set_default_on_invalid_method(Graph &g, NodeType node_type, Setter &&setter) +{ + const std::vector &node_ids = g.nodes(node_type); + for(auto &node_id : node_ids) + { + INode *node = g.node(node_id); + if(node != nullptr) + { + // Validate node + backends::IDeviceBackend *backend = backends::BackendRegistry::get().find_backend(node->assigned_target()); + Status status = backend->validate_node(*node); + + // Set default execution method in case of failure + if(!bool(status)) + { + setter(node); + } + } + } +} +} // namespace + +const char *NodeExecutionMethodMutator::name() +{ + return "NodeExecutionMethodMutator"; +} + +void NodeExecutionMethodMutator::mutate(Graph &g) +{ + // Convolution Layer + set_default_on_invalid_method(g, NodeType::ConvolutionLayer, [](INode * n) + { + ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : " + << n->id() << " and Name: " << n->name() << std::endl); + auto *casted_node = arm_compute::utils::cast::polymorphic_downcast(n); + casted_node->set_convolution_method(ConvolutionMethod::Default); + }); + + // Depthwise Convolution Layer + set_default_on_invalid_method(g, NodeType::DepthwiseConvolutionLayer, [](INode * n) + { + ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : " + << n->id() << " and Name: " << n->name() << std::endl); + auto *casted_node = arm_compute::utils::cast::polymorphic_downcast(n); + casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default); + }); +} +} // namespace graph +} // namespace arm_compute diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp index 6677330cec..82bfe25a3e 100644 --- a/src/graph/mutators/NodeFusionMutator.cpp +++ b/src/graph/mutators/NodeFusionMutator.cpp @@ -25,6 +25,7 @@ #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/Logger.h" +#include "arm_compute/graph/Utils.h" #include "arm_compute/graph/nodes/Nodes.h" #include "arm_compute/core/utils/misc/Cast.h" @@ -71,17 +72,7 @@ void fuse_batch_norm_with_activation(Graph &g) if(bn_node->output(0)->accessor() == nullptr) { // Get driving nodes of activation node - std::vector act_driving_nodes; - for(auto &act_output_edge_id : act_node->output_edges()) - { - auto act_output_edge = g.edge(act_output_edge_id); - if(act_output_edge != nullptr) - { - ARM_COMPUTE_ERROR_ON(act_output_edge->consumer() == nullptr); - act_driving_nodes.push_back( - { act_output_edge->consumer_id(), act_output_edge->consumer_idx() }); - } - } + std::vector act_driving_nodes = get_driving_nodes(*act_node); // Set activation info to batch normalization bn_node->set_fused_activation(act_node->activation_info()); diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp index 2a8c029843..5f1c9c3186 100644 --- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp +++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp @@ -25,6 +25,7 @@ #include "arm_compute/graph/Graph.h" #include "arm_compute/graph/Logger.h" +#include "arm_compute/graph/algorithms/TopologicalSort.h" #include "arm_compute/graph/backends/BackendRegistry.h" #include "arm_compute/graph/nodes/SplitLayerNode.h" @@ -42,10 +43,20 @@ const char *SplitLayerSubTensorMutator::name() void SplitLayerSubTensorMutator::mutate(Graph &g) { + // Early exit if no Split layers exist in graph + if(g.nodes(NodeType::SplitLayer).empty()) + { + return; + } + + // Perform topological sort + std::vector topological_sorted_node_ids = dfs(g); + // Should be in reverse order of execution - for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes())) + for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids)) { - if(node && node->type() == NodeType::SplitLayer && node->input(0) != nullptr) + INode *node = g.node(node_id); + if(node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr) { // Get output tensor Tensor *input_tensor = node->input(0); @@ -63,7 +74,7 @@ void SplitLayerSubTensorMutator::mutate(Graph &g) ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : " << node->id() << " and name : " << node->name() << std::endl); - auto *split_node = arm_compute::utils::cast::polymorphic_downcast(node.get()); + auto *split_node = arm_compute::utils::cast::polymorphic_downcast(node); const unsigned int axis = split_node->axis(); const unsigned int num_splits = split_node->num_splits(); diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp index 81cb2c2e6c..e9cb0396eb 100644 --- a/src/graph/nodes/ConvolutionLayerNode.cpp +++ b/src/graph/nodes/ConvolutionLayerNode.cpp @@ -32,8 +32,12 @@ namespace arm_compute { namespace graph { -ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method, FastMathHint fast_math_hint, QuantizationInfo out_quant_info) - : _info(std::move(info)), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info) +ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, + unsigned int num_groups, + ConvolutionMethod method, + FastMathHint fast_math_hint, + QuantizationInfo out_quant_info) + : _info(std::move(info)), _num_groups(num_groups), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info) { _input_edges.resize(3, EmptyEdgeID); _outputs.resize(1, NullTensorID); @@ -64,6 +68,11 @@ PadStrideInfo ConvolutionLayerNode::convolution_info() const return _info; } +unsigned int ConvolutionLayerNode::num_groups() const +{ + return _num_groups; +} + TensorDescriptor ConvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor, const TensorDescriptor &weights_descriptor, const PadStrideInfo &info) -- cgit v1.2.1