aboutsummaryrefslogtreecommitdiff
path: root/src/graph
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-08-15 12:14:46 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit2a2db590fd179dcb8e1a575293cd2b887e2dc246 (patch)
tree5e10da7cb6777f3020b84a2389b279ceef2be5ee /src/graph
parentc1961b51df2e15a01a5950139e81bbd47fbfa627 (diff)
downloadComputeLibrary-2a2db590fd179dcb8e1a575293cd2b887e2dc246.tar.gz
COMPMID-1505: Add native grouping support at graph level
Change-Id: Iedc91b0aee743b59af5140c8acb8124548da3163 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/144362 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
Diffstat (limited to 'src/graph')
-rw-r--r--src/graph/GraphBuilder.cpp74
-rw-r--r--src/graph/GraphManager.cpp6
-rw-r--r--src/graph/INode.cpp5
-rw-r--r--src/graph/Utils.cpp41
-rw-r--r--src/graph/algorithms/TopologicalSort.cpp188
-rw-r--r--src/graph/backends/GLES/GCNodeValidator.cpp6
-rw-r--r--src/graph/detail/ExecutionHelpers.cpp13
-rw-r--r--src/graph/mutators/DepthConcatSubTensorMutator.cpp19
-rw-r--r--src/graph/mutators/GroupedConvolutionMutator.cpp186
-rw-r--r--src/graph/mutators/NodeExecutionMethodMutator.cpp97
-rw-r--r--src/graph/mutators/NodeFusionMutator.cpp13
-rw-r--r--src/graph/mutators/SplitLayerSubTensorMutator.cpp17
-rw-r--r--src/graph/nodes/ConvolutionLayerNode.cpp13
13 files changed, 581 insertions, 97 deletions
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index fa78024e71..81a18c437f 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -25,7 +25,7 @@
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/algorithms/BFS.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
#include "arm_compute/graph/nodes/Nodes.h"
#include "support/ToolchainSupport.h"
@@ -81,53 +81,6 @@ NodeID create_simple_single_input_output_node(Graph &g, NodeParams &params, Node
return nid;
}
-
-NodeID create_grouped_convolution(Graph &g, const NodeParams &params, NodeIdxPair input, NodeID weights, NodeID bias,
- PadStrideInfo conv_info, ConvolutionMethod method, FastMathHint fast_math_hint, unsigned int num_groups)
-{
- bool has_bias = (bias != EmptyNodeID);
-
- // Split input
- const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
- const unsigned int input_idx = get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL);
- NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx);
-
- // Split weights
- const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]);
- const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc, DataLayoutDimension::BATCHES);
- NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx);
-
- // Split bias
- NodeID bias_split = EmptyNodeID;
- if(has_bias)
- {
- // Split bias
- bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0);
- }
-
- std::vector<NodeIdxPair> convolution_outputs;
- for(unsigned int i = 0; i < num_groups; ++i)
- {
- NodeParams group_params = params;
- NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, fast_math_hint);
- g.add_connection(input_split, i, conv_nid, 0);
- g.add_connection(weights_split, i, conv_nid, 1);
- if(has_bias)
- {
- g.add_connection(bias_split, i, conv_nid, 2);
- }
- // Add group name
- if(!group_params.name.empty())
- {
- group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i));
- }
- set_node_params(g, conv_nid, group_params);
- convolution_outputs.push_back({ conv_nid, 0 });
- }
-
- // Depth concatenate output
- return GraphBuilder::add_concatenate_node(g, params, convolution_outputs, DataLayoutDimension::CHANNEL);
-}
} // namespace
NodeID GraphBuilder::add_const_node(Graph &g, NodeParams params, TensorDescriptor desc, ITensorAccessorUPtr accessor)
@@ -263,24 +216,17 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa
b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
}
- if(num_groups == 1)
- {
- // Create convolution node and connect
- NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, fast_math_hint, out_quant_info);
- g.add_connection(input.node_id, input.index, conv_nid, 0);
- g.add_connection(w_nid, 0, conv_nid, 1);
- if(has_bias)
- {
- g.add_connection(b_nid, 0, conv_nid, 2);
- }
- set_node_params(g, conv_nid, params);
-
- return conv_nid;
- }
- else
+ // Create convolution node and connect
+ NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, num_groups, method, fast_math_hint, out_quant_info);
+ g.add_connection(input.node_id, input.index, conv_nid, 0);
+ g.add_connection(w_nid, 0, conv_nid, 1);
+ if(has_bias)
{
- return create_grouped_convolution(g, params, input, w_nid, b_nid, conv_info, method, fast_math_hint, num_groups);
+ g.add_connection(b_nid, 0, conv_nid, 2);
}
+ set_node_params(g, conv_nid, params);
+
+ return conv_nid;
}
NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdxPair input,
diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp
index db6650cf69..5be4e7e2ba 100644
--- a/src/graph/GraphManager.cpp
+++ b/src/graph/GraphManager.cpp
@@ -32,6 +32,8 @@
#include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h"
#include "arm_compute/graph/detail/ExecutionHelpers.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
+
namespace arm_compute
{
namespace graph
@@ -69,13 +71,13 @@ void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &
pm.run_all(graph);
// Perform topological sort
- // FIXME : Sort nodes and pass sorted indices in configure all nodes
+ std::vector<NodeID> topological_sorted_nodes = dfs(graph);
// Validate all nodes
detail::validate_all_nodes(graph);
// Configure all nodes
- auto workload = detail::configure_all_nodes(graph, ctx);
+ auto workload = detail::configure_all_nodes(graph, ctx, topological_sorted_nodes);
ARM_COMPUTE_ERROR_ON_MSG(workload.tasks.empty(), "Could not configure all nodes!");
// Allocate const tensors and call accessors
diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp
index cd9a46ac40..b0c31372aa 100644
--- a/src/graph/INode.cpp
+++ b/src/graph/INode.cpp
@@ -185,6 +185,11 @@ size_t INode::num_outputs() const
return _outputs.size();
}
+NodeParams INode::common_node_params() const
+{
+ return _common_params;
+}
+
Target INode::requested_target() const
{
return _common_params.target;
diff --git a/src/graph/Utils.cpp b/src/graph/Utils.cpp
index 4715694f15..75644a8933 100644
--- a/src/graph/Utils.cpp
+++ b/src/graph/Utils.cpp
@@ -78,13 +78,21 @@ PassManager create_default_pass_manager(Target target)
{
PassManager pm;
+ // Passes that mutate graph IR
+ pm.append(support::cpp14::make_unique<GroupedConvolutionMutator>());
if(target != Target::GC)
{
pm.append(support::cpp14::make_unique<NodeFusionMutator>());
pm.append(support::cpp14::make_unique<InPlaceOperationMutator>());
+ }
+
+ // Passes that mutate backend information
+ if(target != Target::GC)
+ {
pm.append(support::cpp14::make_unique<DepthConcatSubTensorMutator>());
pm.append(support::cpp14::make_unique<SplitLayerSubTensorMutator>());
}
+ pm.append(support::cpp14::make_unique<NodeExecutionMethodMutator>());
return pm;
}
@@ -139,5 +147,38 @@ size_t get_dimension_idx(const TensorDescriptor &descriptor, const DataLayoutDim
break;
}
}
+
+std::vector<NodeIdxPair> get_driving_nodes(const INode &node)
+{
+ std::vector<NodeIdxPair> driving_nodes;
+
+ const Graph *g = node.graph();
+ ARM_COMPUTE_ERROR_ON(g == nullptr);
+
+ for(auto &output_edge_id : node.output_edges())
+ {
+ auto output_edge = g->edge(output_edge_id);
+ if(output_edge != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON(output_edge->consumer() == nullptr);
+ driving_nodes.push_back({ output_edge->consumer_id(), output_edge->consumer_idx() });
+ }
+ }
+
+ return driving_nodes;
+}
+
+void configure_tensor(Tensor *tensor)
+{
+ if(tensor != nullptr && tensor->handle() == nullptr)
+ {
+ Target target = tensor->desc().target;
+ auto backend = backends::BackendRegistry::get().find_backend(target);
+ ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
+ auto handle = backend->create_tensor(*tensor);
+ ARM_COMPUTE_ERROR_ON_MSG(!backend, "Couldn't create backend handle!");
+ tensor->set_handle(std::move(handle));
+ }
+}
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/algorithms/TopologicalSort.cpp b/src/graph/algorithms/TopologicalSort.cpp
new file mode 100644
index 0000000000..0fbf6e32e8
--- /dev/null
+++ b/src/graph/algorithms/TopologicalSort.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
+
+#include "arm_compute/graph/Graph.h"
+
+#include "arm_compute/core/utils/misc/Iterable.h"
+
+#include <list>
+#include <stack>
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace detail
+{
+/** Checks if all the input dependencies of a node have been visited
+ *
+ * @param[in] node Node to check
+ * @param[in] visited Vector that contains the visited information
+ *
+ * @return True if all inputs dependencies have been visited else false
+ */
+inline bool all_inputs_are_visited(const INode *node, const std::vector<bool> &visited)
+{
+ ARM_COMPUTE_ERROR_ON(node == nullptr);
+ const Graph *graph = node->graph();
+ ARM_COMPUTE_ERROR_ON(graph == nullptr);
+
+ bool are_all_visited = true;
+ for(const auto &input_edge_id : node->input_edges())
+ {
+ if(input_edge_id != EmptyNodeID)
+ {
+ const Edge *input_edge = graph->edge(input_edge_id);
+ ARM_COMPUTE_ERROR_ON(input_edge == nullptr);
+ ARM_COMPUTE_ERROR_ON(input_edge->producer() == nullptr);
+ if(!visited[input_edge->producer_id()])
+ {
+ are_all_visited = false;
+ break;
+ }
+ }
+ }
+
+ return are_all_visited;
+}
+} // namespace detail
+
+std::vector<NodeID> bfs(Graph &g)
+{
+ std::vector<NodeID> bfs_order_vector;
+
+ // Created visited vector
+ std::vector<bool> visited(g.nodes().size(), false);
+
+ // Create BFS queue
+ std::list<NodeID> queue;
+
+ // Push inputs and mark as visited
+ for(auto &input : g.nodes(NodeType::Input))
+ {
+ if(input != EmptyNodeID)
+ {
+ visited[input] = true;
+ queue.push_back(input);
+ }
+ }
+
+ // Push const nodes and mark as visited
+ for(auto &const_node : g.nodes(NodeType::Const))
+ {
+ if(const_node != EmptyNodeID)
+ {
+ visited[const_node] = true;
+ queue.push_back(const_node);
+ }
+ }
+
+ // Iterate over vector and edges
+ while(!queue.empty())
+ {
+ // Dequeue a node from queue and process
+ NodeID n = queue.front();
+ bfs_order_vector.push_back(n);
+ queue.pop_front();
+
+ const INode *node = g.node(n);
+ ARM_COMPUTE_ERROR_ON(node == nullptr);
+ for(const auto &eid : node->output_edges())
+ {
+ const Edge *e = g.edge(eid);
+ ARM_COMPUTE_ERROR_ON(e == nullptr);
+ if(!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited))
+ {
+ visited[e->consumer_id()] = true;
+ queue.push_back(e->consumer_id());
+ }
+ }
+ }
+
+ return bfs_order_vector;
+}
+
+std::vector<NodeID> dfs(Graph &g)
+{
+ std::vector<NodeID> dfs_order_vector;
+
+ // Created visited vector
+ std::vector<bool> visited(g.nodes().size(), false);
+
+ // Create DFS stack
+ std::stack<NodeID> stack;
+
+ // Push inputs and mark as visited
+ for(auto &input : g.nodes(NodeType::Input))
+ {
+ if(input != EmptyNodeID)
+ {
+ visited[input] = true;
+ stack.push(input);
+ }
+ }
+
+ // Push const nodes and mark as visited
+ for(auto &const_node : g.nodes(NodeType::Const))
+ {
+ if(const_node != EmptyNodeID)
+ {
+ visited[const_node] = true;
+ stack.push(const_node);
+ }
+ }
+
+ // Iterate over vector and edges
+ while(!stack.empty())
+ {
+ // Pop a node from stack and process
+ NodeID n = stack.top();
+ dfs_order_vector.push_back(n);
+ stack.pop();
+
+ // Mark node as visited
+ if(!visited[n])
+ {
+ visited[n] = true;
+ }
+
+ const INode *node = g.node(n);
+ ARM_COMPUTE_ERROR_ON(node == nullptr);
+ // Reverse iterate to push branches from right to left and pop on the opposite order
+ for(const auto &eid : arm_compute::utils::iterable::reverse_iterate(node->output_edges()))
+ {
+ const Edge *e = g.edge(eid);
+ ARM_COMPUTE_ERROR_ON(e == nullptr);
+ if(!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited))
+ {
+ stack.push(e->consumer_id());
+ }
+ }
+ }
+
+ return dfs_order_vector;
+}
+} // namespace graph
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/graph/backends/GLES/GCNodeValidator.cpp b/src/graph/backends/GLES/GCNodeValidator.cpp
index 542cfdf973..31d1c8b456 100644
--- a/src/graph/backends/GLES/GCNodeValidator.cpp
+++ b/src/graph/backends/GLES/GCNodeValidator.cpp
@@ -80,15 +80,13 @@ Status validate_convolution_layer(ConvolutionLayerNode &node)
const ConvolutionMethod conv_algorithm = node.convolution_method();
// Validate function
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(node.num_groups() != 1, "Grouping is not supported by ConvolutionLayer!");
if(conv_algorithm == ConvolutionMethod::Direct)
{
bool is_square = weights->tensor_shape().x() == weights->tensor_shape().y();
bool is_direct = (weights->tensor_shape().x() == 1) || (weights->tensor_shape().x() == 3) || (weights->tensor_shape().x() == 5);
bool is_correct_stride = (conv_info.stride().first) <= 2 && (conv_info.stride().second <= 2);
- if(!(is_square && is_direct && is_correct_stride))
- {
- node.set_convolution_method(ConvolutionMethod::Default);
- }
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(is_square && is_direct && is_correct_stride), "Direct convolution is not supported for given configuration");
}
return Status{};
diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp
index 6df67fcfec..6157b7fecf 100644
--- a/src/graph/detail/ExecutionHelpers.cpp
+++ b/src/graph/detail/ExecutionHelpers.cpp
@@ -59,7 +59,7 @@ void configure_all_tensors(Graph &g)
for(auto &tensor : tensors)
{
- if(tensor)
+ if(tensor && tensor->handle() == nullptr)
{
Target target = tensor->desc().target;
auto backend = backends::BackendRegistry::get().find_backend(target);
@@ -131,17 +131,16 @@ void allocate_all_tensors(Graph &g)
}
}
-ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx)
+ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::vector<NodeID> &node_order)
{
ExecutionWorkload workload;
workload.graph = &g;
workload.ctx = &ctx;
- auto &nodes = g.nodes();
-
// Create tasks
- for(auto &node : nodes)
+ for(auto &node_id : node_order)
{
+ auto node = g.node(node_id);
if(node != nullptr)
{
Target assigned_target = node->assigned_target();
@@ -152,14 +151,14 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx)
{
ExecutionTask task;
task.task = std::move(func);
- task.node = node.get();
+ task.node = node;
workload.tasks.push_back(std::move(task));
}
}
}
// Add inputs and outputs
- for(auto &node : nodes)
+ for(auto &node : g.nodes())
{
if(node != nullptr && node->type() == NodeType::Input)
{
diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
index 241c07b367..937528d143 100644
--- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp
+++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Logger.h"
#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
@@ -43,16 +44,26 @@ const char *DepthConcatSubTensorMutator::name()
void DepthConcatSubTensorMutator::mutate(Graph &g)
{
+ // Early exit if no Concatenation layers exist in graph
+ if(g.nodes(NodeType::ConcatenateLayer).empty())
+ {
+ return;
+ }
+
+ // Perform topological sort
+ std::vector<NodeID> topological_sorted_node_ids = dfs(g);
+
// Should be in reverse order of execution
- for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes()))
+ for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
{
- if(node && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr)
+ INode *node = g.node(node_id);
+ if(node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr)
{
// Get output tensor
auto output_tensor = node->output(0);
// Check concatenation axis (Sub-tensor optimization is support for concatenation axis >=2)
- auto *concat_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node.get());
+ auto *concat_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node);
if(output_tensor == nullptr || get_dimension_idx(output_tensor->desc(), concat_node->concatenation_axis()) < 2)
{
continue;
@@ -84,7 +95,7 @@ void DepthConcatSubTensorMutator::mutate(Graph &g)
depth += input_shape.z();
}
- auto *dc_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node.get());
+ auto *dc_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node);
dc_node->set_enabled(false);
}
}
diff --git a/src/graph/mutators/GroupedConvolutionMutator.cpp b/src/graph/mutators/GroupedConvolutionMutator.cpp
new file mode 100644
index 0000000000..d2643d5428
--- /dev/null
+++ b/src/graph/mutators/GroupedConvolutionMutator.cpp
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/mutators/GroupedConvolutionMutator.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphBuilder.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/nodes/Nodes.h"
+
+#include "arm_compute/core/utils/misc/Cast.h"
+
+#include <set>
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace
+{
+NodeID create_grouped_convolution(Graph &g, const NodeParams &params, NodeIdxPair input, NodeID weights, NodeID bias,
+ PadStrideInfo conv_info, ConvolutionMethod method, FastMathHint fast_math_hint, unsigned int num_groups)
+{
+ bool has_bias = (bias != EmptyNodeID);
+
+ // Split input
+ const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
+ const unsigned int input_idx = get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL);
+ NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx);
+
+ // Split weights
+ const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]);
+ const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc, DataLayoutDimension::BATCHES);
+ NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx);
+
+ // Split bias
+ NodeID bias_split = EmptyNodeID;
+ if(has_bias)
+ {
+ // Split bias
+ bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0);
+ }
+
+ std::vector<NodeIdxPair> convolution_outputs;
+ for(unsigned int i = 0; i < num_groups; ++i)
+ {
+ NodeParams group_params = params;
+ NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, 1, method, fast_math_hint);
+ g.add_connection(input_split, i, conv_nid, 0);
+ g.add_connection(weights_split, i, conv_nid, 1);
+ if(has_bias)
+ {
+ g.add_connection(bias_split, i, conv_nid, 2);
+ }
+
+ // Add group name
+ if(!group_params.name.empty())
+ {
+ group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i));
+ }
+
+ // Set node parameters
+ INode *node = g.node(conv_nid);
+ ARM_COMPUTE_ERROR_ON(node == nullptr);
+ node->set_common_node_parameters(group_params);
+
+ convolution_outputs.push_back({ conv_nid, 0 });
+ }
+
+ // Depth concatenate output
+ return GraphBuilder::add_concatenate_node(g, params, convolution_outputs, DataLayoutDimension::CHANNEL);
+}
+} // namespace
+
+const char *GroupedConvolutionMutator::name()
+{
+ return "GroupedConvolutionMutator";
+}
+
+void GroupedConvolutionMutator::mutate(Graph &g)
+{
+ // Early exit if no Convolution layers exist in graph
+ if(g.nodes(NodeType::ConvolutionLayer).empty())
+ {
+ return;
+ }
+
+ // Total nodes
+ size_t total_nodes = g.nodes().size();
+
+ // Iterate over convolution nodes
+ for(unsigned int i = 0; i < total_nodes; ++i)
+ {
+ INode *node = g.node(i);
+ if(node != nullptr && node->type() == NodeType::ConvolutionLayer && arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1)
+ {
+ // Validate node
+ backends::IDeviceBackend *backend = backends::BackendRegistry::get().find_backend(node->assigned_target());
+ Status status = backend->validate_node(*node);
+
+ // If grouped convolution is not supported
+ if(!bool(status))
+ {
+ // Down-cast node
+ auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node);
+
+ // Get internal convolution info
+ // TODO (geopin01) : Create a descriptor
+ const PadStrideInfo conv_info = conv_node->convolution_info();
+ const ConvolutionMethod conv_method = conv_node->convolution_method();
+ const FastMathHint fast_math_hint = conv_node->fast_math_hint();
+ const unsigned int num_groups = conv_node->num_groups();
+ const NodeParams params = conv_node->common_node_params();
+ const Target assigned_target = conv_node->assigned_target();
+
+ // Extract node ids
+ const NodeID input_id = conv_node->input_id(0);
+ const NodeID weights_id = conv_node->input_id(1);
+ const NodeID bias_id = conv_node->input_id(2);
+
+ // Get driving nodes
+ std::vector<NodeIdxPair> driving_nodes = get_driving_nodes(*node);
+
+ // Extract activation node accessor if any
+ auto node_accessor = conv_node->output(0)->extract_accessor();
+
+ // Current max tensor and node id
+ TensorID latest_tid = g.tensors().size();
+ NodeID latest_nid = g.nodes().size();
+
+ // Create grouped convolution node
+ NodeID grouped_conv_id = create_grouped_convolution(g, params, { input_id, 0 }, weights_id, bias_id,
+ conv_info, conv_method, fast_math_hint, num_groups);
+
+ // Remove convolution node
+ g.remove_node(node->id());
+
+ // Update batch normalization node outputs
+ for(auto &driving_node : driving_nodes)
+ {
+ g.add_connection(grouped_conv_id, 0, driving_node.node_id, driving_node.index);
+ }
+
+ // Update accessor to batch normalization node
+ g.node(grouped_conv_id)->output(0)->set_accessor(std::move(node_accessor));
+
+ // Configure new tensors and nodes
+ std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(), [](std::unique_ptr<Tensor> &t)
+ {
+ configure_tensor(t.get());
+ });
+ std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(), [&assigned_target](std::unique_ptr<INode> &n)
+ {
+ if(n != nullptr)
+ {
+ n->set_assigned_target(assigned_target);
+ }
+ });
+ }
+ }
+ }
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/mutators/NodeExecutionMethodMutator.cpp b/src/graph/mutators/NodeExecutionMethodMutator.cpp
new file mode 100644
index 0000000000..896bf0742c
--- /dev/null
+++ b/src/graph/mutators/NodeExecutionMethodMutator.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/mutators/NodeExecutionMethodMutator.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/nodes/Nodes.h"
+
+#include "arm_compute/core/utils/misc/Cast.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace
+{
+/** Runs a default setter function on a given types of nodes
+ *
+ * @tparam Setter Setter function to run
+ *
+ * @param[in, out] g Graph to extract the nodes from
+ * @param[in] node_type Node type
+ * @param[in] setter Setter function
+ */
+template <typename Setter>
+void set_default_on_invalid_method(Graph &g, NodeType node_type, Setter &&setter)
+{
+ const std::vector<NodeID> &node_ids = g.nodes(node_type);
+ for(auto &node_id : node_ids)
+ {
+ INode *node = g.node(node_id);
+ if(node != nullptr)
+ {
+ // Validate node
+ backends::IDeviceBackend *backend = backends::BackendRegistry::get().find_backend(node->assigned_target());
+ Status status = backend->validate_node(*node);
+
+ // Set default execution method in case of failure
+ if(!bool(status))
+ {
+ setter(node);
+ }
+ }
+ }
+}
+} // namespace
+
+const char *NodeExecutionMethodMutator::name()
+{
+ return "NodeExecutionMethodMutator";
+}
+
+void NodeExecutionMethodMutator::mutate(Graph &g)
+{
+ // Convolution Layer
+ set_default_on_invalid_method(g, NodeType::ConvolutionLayer, [](INode * n)
+ {
+ ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : "
+ << n->id() << " and Name: " << n->name() << std::endl);
+ auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n);
+ casted_node->set_convolution_method(ConvolutionMethod::Default);
+ });
+
+ // Depthwise Convolution Layer
+ set_default_on_invalid_method(g, NodeType::DepthwiseConvolutionLayer, [](INode * n)
+ {
+ ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : "
+ << n->id() << " and Name: " << n->name() << std::endl);
+ auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n);
+ casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default);
+ });
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index 6677330cec..82bfe25a3e 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
#include "arm_compute/graph/nodes/Nodes.h"
#include "arm_compute/core/utils/misc/Cast.h"
@@ -71,17 +72,7 @@ void fuse_batch_norm_with_activation(Graph &g)
if(bn_node->output(0)->accessor() == nullptr)
{
// Get driving nodes of activation node
- std::vector<NodeIdxPair> act_driving_nodes;
- for(auto &act_output_edge_id : act_node->output_edges())
- {
- auto act_output_edge = g.edge(act_output_edge_id);
- if(act_output_edge != nullptr)
- {
- ARM_COMPUTE_ERROR_ON(act_output_edge->consumer() == nullptr);
- act_driving_nodes.push_back(
- { act_output_edge->consumer_id(), act_output_edge->consumer_idx() });
- }
- }
+ std::vector<NodeIdxPair> act_driving_nodes = get_driving_nodes(*act_node);
// Set activation info to batch normalization
bn_node->set_fused_activation(act_node->activation_info());
diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
index 2a8c029843..5f1c9c3186 100644
--- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp
+++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/nodes/SplitLayerNode.h"
@@ -42,10 +43,20 @@ const char *SplitLayerSubTensorMutator::name()
void SplitLayerSubTensorMutator::mutate(Graph &g)
{
+ // Early exit if no Split layers exist in graph
+ if(g.nodes(NodeType::SplitLayer).empty())
+ {
+ return;
+ }
+
+ // Perform topological sort
+ std::vector<NodeID> topological_sorted_node_ids = dfs(g);
+
// Should be in reverse order of execution
- for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes()))
+ for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
{
- if(node && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
+ INode *node = g.node(node_id);
+ if(node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
{
// Get output tensor
Tensor *input_tensor = node->input(0);
@@ -63,7 +74,7 @@ void SplitLayerSubTensorMutator::mutate(Graph &g)
ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : "
<< node->id() << " and name : " << node->name() << std::endl);
- auto *split_node = arm_compute::utils::cast::polymorphic_downcast<SplitLayerNode *>(node.get());
+ auto *split_node = arm_compute::utils::cast::polymorphic_downcast<SplitLayerNode *>(node);
const unsigned int axis = split_node->axis();
const unsigned int num_splits = split_node->num_splits();
diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp
index 81cb2c2e6c..e9cb0396eb 100644
--- a/src/graph/nodes/ConvolutionLayerNode.cpp
+++ b/src/graph/nodes/ConvolutionLayerNode.cpp
@@ -32,8 +32,12 @@ namespace arm_compute
{
namespace graph
{
-ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method, FastMathHint fast_math_hint, QuantizationInfo out_quant_info)
- : _info(std::move(info)), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info)
+ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info,
+ unsigned int num_groups,
+ ConvolutionMethod method,
+ FastMathHint fast_math_hint,
+ QuantizationInfo out_quant_info)
+ : _info(std::move(info)), _num_groups(num_groups), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info)
{
_input_edges.resize(3, EmptyEdgeID);
_outputs.resize(1, NullTensorID);
@@ -64,6 +68,11 @@ PadStrideInfo ConvolutionLayerNode::convolution_info() const
return _info;
}
+unsigned int ConvolutionLayerNode::num_groups() const
+{
+ return _num_groups;
+}
+
TensorDescriptor ConvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
const TensorDescriptor &weights_descriptor,
const PadStrideInfo &info)