COMPMID-1505: Add native grouping support at graph level

Change-Id: Iedc91b0aee743b59af5140c8acb8124548da3163 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/144362 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-08-15 12:14:46 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:54:54 +0000
commit: 2a2db590fd179dcb8e1a575293cd2b887e2dc246 (patch)
tree: 5e10da7cb6777f3020b84a2389b279ceef2be5ee
parent: c1961b51df2e15a01a5950139e81bbd47fbfa627 (diff)
download: ComputeLibrary-2a2db590fd179dcb8e1a575293cd2b887e2dc246.tar.gz
26 files changed, 688 insertions, 142 deletions
diff --git a/arm_compute/graph/INode.h b/arm_compute/graph/INode.h
index f8101d7df2..4219150f58 100644
--- a/arm_compute/graph/INode.h
+++ b/arm_compute/graph/INode.h
@@ -224,6 +224,11 @@ public:
      * @return Number of outputs
      */
     size_t num_outputs() const;
+    /** Returns common node parameters
+     *
+     * @return Common node parameters
+     */
+    NodeParams common_node_params() const;
     /** Returns requested target for this node
      *
      * @return Requested execution target
diff --git a/arm_compute/graph/PassManager.h b/arm_compute/graph/PassManager.h
index 9f32a458d0..27f7794c8a 100644
--- a/arm_compute/graph/PassManager.h
+++ b/arm_compute/graph/PassManager.h
@@ -74,13 +74,13 @@ public:
     void clear();
     /** Runs all the mutation passes on a given graph
      *
-     * @param[in] g Graph to run the mutations on
+     * @param[in, out] g Graph to run the mutations on
      */
     void run_all(Graph &g);
     /** Runs a specific mutation pass on a given graph
      *
-     * @param[in] g     Graph to run the mutation on
-     * @param[in] index Index of the mutation to execute
+     * @param[in, out] g     Graph to run the mutation on
+     * @param[in]      index Index of the mutation to execute
      */
     void run(Graph &g, size_t index);
 
diff --git a/arm_compute/graph/Utils.h b/arm_compute/graph/Utils.h
index 3604bad4af..1a0509b6fa 100644
--- a/arm_compute/graph/Utils.h
+++ b/arm_compute/graph/Utils.h
@@ -115,6 +115,18 @@ size_t get_dimension_size(const TensorDescriptor &descriptor, const DataLayoutDi
  * @return Idx of given dimension
  */
 size_t get_dimension_idx(const TensorDescriptor &descriptor, const DataLayoutDimension data_layout_dimension);
+/** Get the list of driving nodes of a given node
+ *
+ * @param[in] node Node to find the driving node of
+ *
+ * @return A list with the driving node of a given node
+ */
+std::vector<NodeIdxPair> get_driving_nodes(const INode &node);
+/** Configures tensor
+ *
+ * @param[in, out] tensor Tensor to configure
+ */
+void configure_tensor(Tensor *tensor);
 } // namespace graph
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_GRAPH_UTILS_H__ */
diff --git a/arm_compute/graph/algorithms/Algorithms.h b/arm_compute/graph/algorithms/Algorithms.h
index f89856f327..7af2455e93 100644
--- a/arm_compute/graph/algorithms/Algorithms.h
+++ b/arm_compute/graph/algorithms/Algorithms.h
@@ -24,6 +24,6 @@
 #ifndef __ARM_COMPUTE_GRAPH_ALGORITHMS_H__
 #define __ARM_COMPUTE_GRAPH_ALGORITHMS_H__
 
-#include "arm_compute/graph/algorithms/BFS.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
 
 #endif /* __ARM_COMPUTE_GRAPH_ALGORITHMS_H__ */
diff --git a/arm_compute/graph/algorithms/TopologicalSort.h b/arm_compute/graph/algorithms/TopologicalSort.h
new file mode 100644
index 0000000000..6684844790
--- /dev/null
+++ b/arm_compute/graph/algorithms/TopologicalSort.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GRAPH_ALGORITHM_TOPOLOGICAL_SORT_H__
+#define __ARM_COMPUTE_GRAPH_ALGORITHM_TOPOLOGICAL_SORT_H__
+
+#include "arm_compute/graph/Types.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace graph
+{
+// Forward declarations
+class Graph;
+
+/** Breadth first search traversal
+ *
+ * @param g Graph to traverse
+ *
+ * @return A vector with the node id traversal order
+ */
+std::vector<NodeID> bfs(Graph &g);
+/** Depth first search traversal
+ *
+ * @param g Graph to traverse
+ *
+ * @return A vector with the node id traversal order
+ */
+std::vector<NodeID> dfs(Graph &g);
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GRAPH_ALGORITHM_TOPOLOGICAL_SORT_H__ */
diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h
index 5739773dfc..4a3f001671 100644
--- a/arm_compute/graph/backends/FunctionHelpers.h
+++ b/arm_compute/graph/backends/FunctionHelpers.h
@@ -265,6 +265,7 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
     }
 
     const PadStrideInfo     conv_info      = node.convolution_info();
+    const unsigned int      num_groups     = node.num_groups();
     const ConvolutionMethod conv_algorithm = node.convolution_method();
     const bool              fast_math      = node.fast_math_hint() == FastMathHint::Enabled;
 
@@ -275,12 +276,14 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
 
     if(conv_algorithm == ConvolutionMethod::Winograd)
     {
+        ARM_COMPUTE_ERROR_ON_MSG(num_groups != 1, "WinogradConvolutionLayer does not support grouping!");
         std::tie(func, func_name) = create_named_memory_managed_function<typename ConvolutionLayerFunctions::WinogradConvolutionLayer>(
                                         std::string("WinogradConvolutionLayer"), mm,
                                         input, weights, biases, output, conv_info, ActivationLayerInfo(), fast_math);
     }
     else if(conv_algorithm == ConvolutionMethod::Direct)
     {
+        ARM_COMPUTE_ERROR_ON_MSG(num_groups != 1, "DirectConvolutionLayer does not support grouping!");
         std::tie(func, func_name) = create_named_function<typename ConvolutionLayerFunctions::DirectConvolutionLayer>(
                                         std::string("DirectConvolutionLayer"),
                                         input, weights, biases, output, conv_info);
@@ -289,19 +292,22 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
     {
         std::tie(func, func_name) = create_named_memory_managed_function<typename ConvolutionLayerFunctions::GEMMConvolutionLayer>(
                                         std::string("GEMMConvolutionLayer"), mm,
-                                        input, weights, biases, output, conv_info);
+                                        input, weights, biases, output, conv_info,
+                                        WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), num_groups);
     }
     else
     {
         std::tie(func, func_name) = create_named_memory_managed_function<typename ConvolutionLayerFunctions::GenericConvolutionLayer>(
                                         std::string("GenericConvolutionLayer"), mm,
-                                        input, weights, biases, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
+                                        input, weights, biases, output, conv_info,
+                                        WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math, num_groups);
     }
 
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
                                << " Target " << TargetInfo::TargetType
                                << " Data Type: " << input->info()->data_type()
+                               << " Groups: " << num_groups
                                << " Input QuantInfo: " << input->info()->quantization_info()
                                << " Weights QuantInfo: " << weights->info()->quantization_info()
                                << " Input shape: " << input->info()->tensor_shape()
diff --git a/arm_compute/graph/backends/ValidateHelpers.h b/arm_compute/graph/backends/ValidateHelpers.h
index ec84399ac6..3064db20c3 100644
--- a/arm_compute/graph/backends/ValidateHelpers.h
+++ b/arm_compute/graph/backends/ValidateHelpers.h
@@ -107,37 +107,30 @@ Status validate_convolution_layer(ConvolutionLayerNode &node)
     const PadStrideInfo     conv_info      = node.convolution_info();
     const ConvolutionMethod conv_algorithm = node.convolution_method();
     const bool              fast_math      = node.fast_math_hint() == FastMathHint::Enabled;
+    const unsigned int      num_groups     = node.num_groups();
 
     // Validate function
     Status status{};
     switch(conv_algorithm)
     {
         case ConvolutionMethod::Direct:
+            ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups != 1, "DirectConvolutionLayer does not support grouping!");
             status = DirectConvolutionLayer::validate(input, weights, biases, output, conv_info);
             break;
         case ConvolutionMethod::GEMM:
-            status = GEMMConvolutionLayer::validate(input, weights, biases, output, conv_info);
+            status = GEMMConvolutionLayer::validate(input, weights, biases, output, conv_info,
+                                                    WeightsInfo(), Size2D(1, 1), ActivationLayerInfo(), num_groups);
             break;
         case ConvolutionMethod::Winograd:
+            ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups != 1, "WinogradConvolutionLayer does not support grouping!");
             status = WinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, ActivationLayerInfo(), fast_math);
             break;
         case ConvolutionMethod::Default:
-            status = ConvolutionLayer::validate(input, weights, biases, output, conv_info);
+            status = ConvolutionLayer::validate(input, weights, biases, output, conv_info,
+                                                WeightsInfo(), Size2D(1, 1), ActivationLayerInfo(), fast_math, num_groups);
             break;
         default:
-            break;
-    }
-
-    // If validation fails try the Default approach
-    if(!bool(status))
-    {
-        status = ConvolutionLayer::validate(input, weights, biases, output, conv_info /*, fast_math*/);
-        if(bool(status))
-        {
-            ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : "
-                                       << node.id() << " and Name: " << node.name() << std::endl);
-            node.set_convolution_method(ConvolutionMethod::Default);
-        }
+            ARM_COMPUTE_RETURN_ERROR_MSG("Unsupported convolution method");
     }
 
     return status;
@@ -160,20 +153,30 @@ Status validate_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
     ARM_COMPUTE_RETURN_ERROR_ON(node.num_outputs() != 1);
 
     // Extract IO and info
-    arm_compute::ITensorInfo        *weights       = detail::get_backing_tensor_info(node.input(1));
+    arm_compute::ITensorInfo *input   = detail::get_backing_tensor_info(node.input(0));
+    arm_compute::ITensorInfo *weights = detail::get_backing_tensor_info(node.input(1));
+    arm_compute::ITensorInfo *biases  = get_backing_tensor_info(node.input(2));
+    arm_compute::ITensorInfo *output  = get_backing_tensor_info(node.output(0));
+
+    const PadStrideInfo              conv_info     = node.convolution_info();
     const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
-    ARM_COMPUTE_ERROR_ON(weights == nullptr);
 
-    // TODO (geopin01) : Switch when validation is implemented
     // Validate function
-    if((dwc_algorithm == DepthwiseConvolutionMethod::Optimized3x3) && (weights->tensor_shape()[get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH)] != 3))
+    Status status{};
+    switch(dwc_algorithm)
     {
-        ARM_COMPUTE_LOG_GRAPH_INFO("Switched DepthwiseConvolutionLayer method of node with ID : "
-                                   << node.id() << " and Name: " << node.name() << std::endl);
-        node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default);
+        case DepthwiseConvolutionMethod::Default:
+        case DepthwiseConvolutionMethod::GEMV:
+            status = DepthwiseConvolutionLayer::validate(input, weights, biases, output, conv_info);
+            break;
+        case DepthwiseConvolutionMethod::Optimized3x3:
+            status = DepthwiseConvolutionLayer3x3::validate(input, weights, biases, output, conv_info);
+            break;
+        default:
+            ARM_COMPUTE_RETURN_ERROR_MSG("Unsupported depthwise convolution method");
     }
 
-    return Status{};
+    return status;
 }
 
 /** Validates a permute layer node
diff --git a/arm_compute/graph/detail/ExecutionHelpers.h b/arm_compute/graph/detail/ExecutionHelpers.h
index 3a357776e4..fd8d082770 100644
--- a/arm_compute/graph/detail/ExecutionHelpers.h
+++ b/arm_compute/graph/detail/ExecutionHelpers.h
@@ -71,12 +71,13 @@ void allocate_const_tensors(Graph &g);
 void allocate_all_tensors(Graph &g);
 /** Configures all nodes of graph
  *
- * @param[in] g   Graph to configure the nodes
- * @param[in] ctx Graph context to use
+ * @param[in, out] g          Graph to configure the nodes
+ * @param[in]      ctx        Graph context to use
+ * @param[in]      node_order The order to configure the nodes
  *
  * @return The execution workload
  */
-ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx);
+ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::vector<NodeID> &node_order);
 /** Release the memory of all unused const nodes
  *
  * @param[in] g Graph to release the memory from
diff --git a/arm_compute/graph/mutators/GraphMutators.h b/arm_compute/graph/mutators/GraphMutators.h
index a91bc91a12..67c4d7a2ee 100644
--- a/arm_compute/graph/mutators/GraphMutators.h
+++ b/arm_compute/graph/mutators/GraphMutators.h
@@ -25,7 +25,9 @@
 #define __ARM_COMPUTE_GRAPH_GRAPH_MUTATORS_H__
 
 #include "arm_compute/graph/mutators/DepthConcatSubTensorMutator.h"
+#include "arm_compute/graph/mutators/GroupedConvolutionMutator.h"
 #include "arm_compute/graph/mutators/InPlaceOperationMutator.h"
+#include "arm_compute/graph/mutators/NodeExecutionMethodMutator.h"
 #include "arm_compute/graph/mutators/NodeFusionMutator.h"
 #include "arm_compute/graph/mutators/SplitLayerSubTensorMutator.h"
 
diff --git a/arm_compute/graph/mutators/GroupedConvolutionMutator.h b/arm_compute/graph/mutators/GroupedConvolutionMutator.h
new file mode 100644
index 0000000000..8ce2554de1
--- /dev/null
+++ b/arm_compute/graph/mutators/GroupedConvolutionMutator.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GRAPH_GROUPED_CONVOLUTION_MUTATOR_H__
+#define __ARM_COMPUTE_GRAPH_GROUPED_CONVOLUTION_MUTATOR_H__
+
+#include "arm_compute/graph/IGraphMutator.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+/** Mutation pass to implement/optimize grouped convolutions
+ *
+ * @warning This is compulsory to run in case of grouped convolutions
+ **/
+class GroupedConvolutionMutator final : public IGraphMutator
+{
+public:
+    // Inherited methods overridden
+    virtual void mutate(Graph &g) override;
+    const char *name() override;
+};
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GRAPH_GROUPED_CONVOLUTION_MUTATOR_H__ */
diff --git a/arm_compute/graph/mutators/NodeExecutionMethodMutator.h b/arm_compute/graph/mutators/NodeExecutionMethodMutator.h
new file mode 100644
index 0000000000..f961f5d420
--- /dev/null
+++ b/arm_compute/graph/mutators/NodeExecutionMethodMutator.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GRAPH_NODE_EXECUTION_METHOD_MUTATOR_H__
+#define __ARM_COMPUTE_GRAPH_NODE_EXECUTION_METHOD_MUTATOR_H__
+
+#include "arm_compute/graph/IGraphMutator.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+/** Mutation pass to fall-back to default execution method
+ *
+ * @note This operates on nodes that support multiple execution methods (e.g. ConvolutionLayerNode)
+ *       and in case the requested execution method is not supported for a given configuration.
+ *       Thus this is a fall-back mechanism to ensure graph execution.
+ */
+class NodeExecutionMethodMutator final : public IGraphMutator
+{
+public:
+    // Inherited methods overridden
+    virtual void mutate(Graph &g) override;
+    const char *name() override;
+};
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GRAPH_NODE_EXECUTION_METHOD_MUTATOR_H__ */
diff --git a/arm_compute/graph/nodes/ConvolutionLayerNode.h b/arm_compute/graph/nodes/ConvolutionLayerNode.h
index 4299be6bb5..0698ac1360 100644
--- a/arm_compute/graph/nodes/ConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/ConvolutionLayerNode.h
@@ -37,11 +37,13 @@ public:
     /** Constructor
      *
      * @param[in] info           Convolution layer attributes
+     * @param[in] num_groups     (Optional) Number of groups (Defaults to 1)
      * @param[in] method         (Optional) Convolution method to use
      * @param[in] fast_math_hint (Optional) Fast math hint
      * @param[in] out_quant_info (Optional) Output quantization info
      */
     ConvolutionLayerNode(PadStrideInfo     info,
+                         unsigned int      num_groups     = 1,
                          ConvolutionMethod method         = ConvolutionMethod::Default,
                          FastMathHint      fast_math_hint = FastMathHint::Disabled,
                          QuantizationInfo  out_quant_info = QuantizationInfo());
@@ -73,6 +75,11 @@ public:
      * @return Convolution information
      */
     PadStrideInfo convolution_info() const;
+    /** Number of groups in convolution accessor
+     *
+     * @return Number of groups in convolution
+     */
+    unsigned int num_groups() const;
     /** Computes convolution output descriptor
      *
      * @param[in] input_descriptor   Input descriptor
@@ -93,6 +100,7 @@ public:
 
 private:
     PadStrideInfo     _info;
+    unsigned int      _num_groups;
     ConvolutionMethod _method;
     FastMathHint      _fast_math_hint;
     QuantizationInfo  _out_quant_info;
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index 229fb24010..0547a6a6a8 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -136,7 +136,7 @@ public:
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
 
     // Inherited methods overriden:
     void run() override;
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index fa78024e71..81a18c437f 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -25,7 +25,7 @@
 
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/algorithms/BFS.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
 #include "arm_compute/graph/nodes/Nodes.h"
 
 #include "support/ToolchainSupport.h"
@@ -81,53 +81,6 @@ NodeID create_simple_single_input_output_node(Graph &g, NodeParams &params, Node
 
     return nid;
 }
-
-NodeID create_grouped_convolution(Graph &g, const NodeParams &params, NodeIdxPair input, NodeID weights, NodeID bias,
-                                  PadStrideInfo conv_info, ConvolutionMethod method, FastMathHint fast_math_hint, unsigned int num_groups)
-{
-    bool has_bias = (bias != EmptyNodeID);
-
-    // Split input
-    const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
-    const unsigned int     input_idx         = get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL);
-    NodeID                 input_split       = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx);
-
-    // Split weights
-    const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]);
-    const unsigned int     batch_idx           = get_dimension_idx(weights_tensor_desc, DataLayoutDimension::BATCHES);
-    NodeID                 weights_split       = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx);
-
-    // Split bias
-    NodeID bias_split = EmptyNodeID;
-    if(has_bias)
-    {
-        // Split bias
-        bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0);
-    }
-
-    std::vector<NodeIdxPair> convolution_outputs;
-    for(unsigned int i = 0; i < num_groups; ++i)
-    {
-        NodeParams group_params = params;
-        NodeID     conv_nid     = g.add_node<ConvolutionLayerNode>(conv_info, method, fast_math_hint);
-        g.add_connection(input_split, i, conv_nid, 0);
-        g.add_connection(weights_split, i, conv_nid, 1);
-        if(has_bias)
-        {
-            g.add_connection(bias_split, i, conv_nid, 2);
-        }
-        // Add group name
-        if(!group_params.name.empty())
-        {
-            group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i));
-        }
-        set_node_params(g, conv_nid, group_params);
-        convolution_outputs.push_back({ conv_nid, 0 });
-    }
-
-    // Depth concatenate output
-    return GraphBuilder::add_concatenate_node(g, params, convolution_outputs, DataLayoutDimension::CHANNEL);
-}
 } // namespace
 
 NodeID GraphBuilder::add_const_node(Graph &g, NodeParams params, TensorDescriptor desc, ITensorAccessorUPtr accessor)
@@ -263,24 +216,17 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa
         b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
     }
 
-    if(num_groups == 1)
-    {
-        // Create convolution node and connect
-        NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, fast_math_hint, out_quant_info);
-        g.add_connection(input.node_id, input.index, conv_nid, 0);
-        g.add_connection(w_nid, 0, conv_nid, 1);
-        if(has_bias)
-        {
-            g.add_connection(b_nid, 0, conv_nid, 2);
-        }
-        set_node_params(g, conv_nid, params);
-
-        return conv_nid;
-    }
-    else
+    // Create convolution node and connect
+    NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, num_groups, method, fast_math_hint, out_quant_info);
+    g.add_connection(input.node_id, input.index, conv_nid, 0);
+    g.add_connection(w_nid, 0, conv_nid, 1);
+    if(has_bias)
     {
-        return create_grouped_convolution(g, params, input, w_nid, b_nid, conv_info, method, fast_math_hint, num_groups);
+        g.add_connection(b_nid, 0, conv_nid, 2);
     }
+    set_node_params(g, conv_nid, params);
+
+    return conv_nid;
 }
 
 NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdxPair input,
diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp
index db6650cf69..5be4e7e2ba 100644
--- a/src/graph/GraphManager.cpp
+++ b/src/graph/GraphManager.cpp
@@ -32,6 +32,8 @@
 #include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h"
 #include "arm_compute/graph/detail/ExecutionHelpers.h"
 
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
+
 namespace arm_compute
 {
 namespace graph
@@ -69,13 +71,13 @@ void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &
     pm.run_all(graph);
 
     // Perform topological sort
-    // FIXME : Sort nodes and pass sorted indices in configure all nodes
+    std::vector<NodeID> topological_sorted_nodes = dfs(graph);
 
     // Validate all nodes
     detail::validate_all_nodes(graph);
 
     // Configure all nodes
-    auto workload = detail::configure_all_nodes(graph, ctx);
+    auto workload = detail::configure_all_nodes(graph, ctx, topological_sorted_nodes);
     ARM_COMPUTE_ERROR_ON_MSG(workload.tasks.empty(), "Could not configure all nodes!");
 
     // Allocate const tensors and call accessors
diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp
index cd9a46ac40..b0c31372aa 100644
--- a/src/graph/INode.cpp
+++ b/src/graph/INode.cpp
@@ -185,6 +185,11 @@ size_t INode::num_outputs() const
     return _outputs.size();
 }
 
+NodeParams INode::common_node_params() const
+{
+    return _common_params;
+}
+
 Target INode::requested_target() const
 {
     return _common_params.target;
diff --git a/src/graph/Utils.cpp b/src/graph/Utils.cpp
index 4715694f15..75644a8933 100644
--- a/src/graph/Utils.cpp
+++ b/src/graph/Utils.cpp
@@ -78,13 +78,21 @@ PassManager create_default_pass_manager(Target target)
 {
     PassManager pm;
 
+    // Passes that mutate graph IR
+    pm.append(support::cpp14::make_unique<GroupedConvolutionMutator>());
     if(target != Target::GC)
     {
         pm.append(support::cpp14::make_unique<NodeFusionMutator>());
         pm.append(support::cpp14::make_unique<InPlaceOperationMutator>());
+    }
+
+    // Passes that mutate backend information
+    if(target != Target::GC)
+    {
         pm.append(support::cpp14::make_unique<DepthConcatSubTensorMutator>());
         pm.append(support::cpp14::make_unique<SplitLayerSubTensorMutator>());
     }
+    pm.append(support::cpp14::make_unique<NodeExecutionMethodMutator>());
 
     return pm;
 }
@@ -139,5 +147,38 @@ size_t get_dimension_idx(const TensorDescriptor &descriptor, const DataLayoutDim
             break;
     }
 }
+
+std::vector<NodeIdxPair> get_driving_nodes(const INode &node)
+{
+    std::vector<NodeIdxPair> driving_nodes;
+
+    const Graph *g = node.graph();
+    ARM_COMPUTE_ERROR_ON(g == nullptr);
+
+    for(auto &output_edge_id : node.output_edges())
+    {
+        auto output_edge = g->edge(output_edge_id);
+        if(output_edge != nullptr)
+        {
+            ARM_COMPUTE_ERROR_ON(output_edge->consumer() == nullptr);
+            driving_nodes.push_back({ output_edge->consumer_id(), output_edge->consumer_idx() });
+        }
+    }
+
+    return driving_nodes;
+}
+
+void configure_tensor(Tensor *tensor)
+{
+    if(tensor != nullptr && tensor->handle() == nullptr)
+    {
+        Target target  = tensor->desc().target;
+        auto   backend = backends::BackendRegistry::get().find_backend(target);
+        ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
+        auto handle = backend->create_tensor(*tensor);
+        ARM_COMPUTE_ERROR_ON_MSG(!backend, "Couldn't create backend handle!");
+        tensor->set_handle(std::move(handle));
+    }
+}
 } // namespace graph
 } // namespace arm_compute
diff --git a/arm_compute/graph/algorithms/BFS.h b/src/graph/algorithms/TopologicalSort.cpp
index 97292d733b..0fbf6e32e8 100644
--- a/arm_compute/graph/algorithms/BFS.h
+++ b/src/graph/algorithms/TopologicalSort.cpp
@@ -21,13 +21,14 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_GRAPH_ALGORITHM_BFS_H__
-#define __ARM_COMPUTE_GRAPH_ALGORITHM_BFS_H__
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
 
 #include "arm_compute/graph/Graph.h"
 
+#include "arm_compute/core/utils/misc/Iterable.h"
+
 #include <list>
-#include <vector>
+#include <stack>
 
 namespace arm_compute
 {
@@ -68,13 +69,7 @@ inline bool all_inputs_are_visited(const INode *node, const std::vector<bool> &v
 }
 } // namespace detail
 
-/** Breadth first search traversal
- *
- * @param g Graph to traverse
- *
- * @return A vector with the node id traversal order
- */
-inline std::vector<NodeID> bfs(Graph &g)
+std::vector<NodeID> bfs(Graph &g)
 {
     std::vector<NodeID> bfs_order_vector;
 
@@ -94,6 +89,16 @@ inline std::vector<NodeID> bfs(Graph &g)
         }
     }
 
+    // Push const nodes and mark as visited
+    for(auto &const_node : g.nodes(NodeType::Const))
+    {
+        if(const_node != EmptyNodeID)
+        {
+            visited[const_node] = true;
+            queue.push_back(const_node);
+        }
+    }
+
     // Iterate over vector and edges
     while(!queue.empty())
     {
@@ -118,6 +123,66 @@ inline std::vector<NodeID> bfs(Graph &g)
 
     return bfs_order_vector;
 }
+
+std::vector<NodeID> dfs(Graph &g)
+{
+    std::vector<NodeID> dfs_order_vector;
+
+    // Created visited vector
+    std::vector<bool> visited(g.nodes().size(), false);
+
+    // Create DFS stack
+    std::stack<NodeID> stack;
+
+    // Push inputs and mark as visited
+    for(auto &input : g.nodes(NodeType::Input))
+    {
+        if(input != EmptyNodeID)
+        {
+            visited[input] = true;
+            stack.push(input);
+        }
+    }
+
+    // Push const nodes and mark as visited
+    for(auto &const_node : g.nodes(NodeType::Const))
+    {
+        if(const_node != EmptyNodeID)
+        {
+            visited[const_node] = true;
+            stack.push(const_node);
+        }
+    }
+
+    // Iterate over vector and edges
+    while(!stack.empty())
+    {
+        // Pop a node from stack and process
+        NodeID n = stack.top();
+        dfs_order_vector.push_back(n);
+        stack.pop();
+
+        // Mark node as visited
+        if(!visited[n])
+        {
+            visited[n] = true;
+        }
+
+        const INode *node = g.node(n);
+        ARM_COMPUTE_ERROR_ON(node == nullptr);
+        // Reverse iterate to push branches from right to left and pop on the opposite order
+        for(const auto &eid : arm_compute::utils::iterable::reverse_iterate(node->output_edges()))
+        {
+            const Edge *e = g.edge(eid);
+            ARM_COMPUTE_ERROR_ON(e == nullptr);
+            if(!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited))
+            {
+                stack.push(e->consumer_id());
+            }
+        }
+    }
+
+    return dfs_order_vector;
+}
 } // namespace graph
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_GRAPH_ALGORITHM_BFS_H__ */
+} // namespace arm_compute
+\ No newline at end of file
diff --git a/src/graph/backends/GLES/GCNodeValidator.cpp b/src/graph/backends/GLES/GCNodeValidator.cpp
index 542cfdf973..31d1c8b456 100644
--- a/src/graph/backends/GLES/GCNodeValidator.cpp
+++ b/src/graph/backends/GLES/GCNodeValidator.cpp
@@ -80,15 +80,13 @@ Status validate_convolution_layer(ConvolutionLayerNode &node)
     const ConvolutionMethod   conv_algorithm = node.convolution_method();
 
     // Validate function
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(node.num_groups() != 1, "Grouping is not supported by ConvolutionLayer!");
     if(conv_algorithm == ConvolutionMethod::Direct)
     {
         bool is_square         = weights->tensor_shape().x() == weights->tensor_shape().y();
         bool is_direct         = (weights->tensor_shape().x() == 1) || (weights->tensor_shape().x() == 3) || (weights->tensor_shape().x() == 5);
         bool is_correct_stride = (conv_info.stride().first) <= 2 && (conv_info.stride().second <= 2);
-        if(!(is_square && is_direct && is_correct_stride))
-        {
-            node.set_convolution_method(ConvolutionMethod::Default);
-        }
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(is_square && is_direct && is_correct_stride), "Direct convolution is not supported for given configuration");
     }
 
     return Status{};
diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp
index 6df67fcfec..6157b7fecf 100644
--- a/src/graph/detail/ExecutionHelpers.cpp
+++ b/src/graph/detail/ExecutionHelpers.cpp
@@ -59,7 +59,7 @@ void configure_all_tensors(Graph &g)
 
     for(auto &tensor : tensors)
     {
-        if(tensor)
+        if(tensor && tensor->handle() == nullptr)
         {
             Target target  = tensor->desc().target;
             auto   backend = backends::BackendRegistry::get().find_backend(target);
@@ -131,17 +131,16 @@ void allocate_all_tensors(Graph &g)
     }
 }
 
-ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx)
+ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::vector<NodeID> &node_order)
 {
     ExecutionWorkload workload;
     workload.graph = &g;
     workload.ctx   = &ctx;
 
-    auto &nodes = g.nodes();
-
     // Create tasks
-    for(auto &node : nodes)
+    for(auto &node_id : node_order)
     {
+        auto node = g.node(node_id);
         if(node != nullptr)
         {
             Target assigned_target = node->assigned_target();
@@ -152,14 +151,14 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx)
             {
                 ExecutionTask task;
                 task.task = std::move(func);
-                task.node = node.get();
+                task.node = node;
                 workload.tasks.push_back(std::move(task));
             }
         }
     }
 
     // Add inputs and outputs
-    for(auto &node : nodes)
+    for(auto &node : g.nodes())
     {
         if(node != nullptr && node->type() == NodeType::Input)
         {
diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
index 241c07b367..937528d143 100644
--- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp
+++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/Logger.h"
 #include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
 #include "arm_compute/graph/backends/BackendRegistry.h"
 #include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
 
@@ -43,16 +44,26 @@ const char *DepthConcatSubTensorMutator::name()
 
 void DepthConcatSubTensorMutator::mutate(Graph &g)
 {
+    // Early exit if no Concatenation layers exist in graph
+    if(g.nodes(NodeType::ConcatenateLayer).empty())
+    {
+        return;
+    }
+
+    // Perform topological sort
+    std::vector<NodeID> topological_sorted_node_ids = dfs(g);
+
     // Should be in reverse order of execution
-    for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes()))
+    for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
     {
-        if(node && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr)
+        INode *node = g.node(node_id);
+        if(node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr)
         {
             // Get output tensor
             auto output_tensor = node->output(0);
 
             // Check concatenation axis (Sub-tensor optimization is support for concatenation axis >=2)
-            auto *concat_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node.get());
+            auto *concat_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node);
             if(output_tensor == nullptr || get_dimension_idx(output_tensor->desc(), concat_node->concatenation_axis()) < 2)
             {
                 continue;
@@ -84,7 +95,7 @@ void DepthConcatSubTensorMutator::mutate(Graph &g)
                     depth += input_shape.z();
                 }
 
-                auto *dc_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node.get());
+                auto *dc_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node);
                 dc_node->set_enabled(false);
             }
         }
diff --git a/src/graph/mutators/GroupedConvolutionMutator.cpp b/src/graph/mutators/GroupedConvolutionMutator.cpp
new file mode 100644
index 0000000000..d2643d5428
--- /dev/null
+++ b/src/graph/mutators/GroupedConvolutionMutator.cpp
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/mutators/GroupedConvolutionMutator.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphBuilder.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/nodes/Nodes.h"
+
+#include "arm_compute/core/utils/misc/Cast.h"
+
+#include <set>
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace
+{
+NodeID create_grouped_convolution(Graph &g, const NodeParams &params, NodeIdxPair input, NodeID weights, NodeID bias,
+                                  PadStrideInfo conv_info, ConvolutionMethod method, FastMathHint fast_math_hint, unsigned int num_groups)
+{
+    bool has_bias = (bias != EmptyNodeID);
+
+    // Split input
+    const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
+    const unsigned int     input_idx         = get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL);
+    NodeID                 input_split       = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx);
+
+    // Split weights
+    const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]);
+    const unsigned int     batch_idx           = get_dimension_idx(weights_tensor_desc, DataLayoutDimension::BATCHES);
+    NodeID                 weights_split       = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx);
+
+    // Split bias
+    NodeID bias_split = EmptyNodeID;
+    if(has_bias)
+    {
+        // Split bias
+        bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0);
+    }
+
+    std::vector<NodeIdxPair> convolution_outputs;
+    for(unsigned int i = 0; i < num_groups; ++i)
+    {
+        NodeParams group_params = params;
+        NodeID     conv_nid     = g.add_node<ConvolutionLayerNode>(conv_info, 1, method, fast_math_hint);
+        g.add_connection(input_split, i, conv_nid, 0);
+        g.add_connection(weights_split, i, conv_nid, 1);
+        if(has_bias)
+        {
+            g.add_connection(bias_split, i, conv_nid, 2);
+        }
+
+        // Add group name
+        if(!group_params.name.empty())
+        {
+            group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i));
+        }
+
+        // Set node parameters
+        INode *node = g.node(conv_nid);
+        ARM_COMPUTE_ERROR_ON(node == nullptr);
+        node->set_common_node_parameters(group_params);
+
+        convolution_outputs.push_back({ conv_nid, 0 });
+    }
+
+    // Depth concatenate output
+    return GraphBuilder::add_concatenate_node(g, params, convolution_outputs, DataLayoutDimension::CHANNEL);
+}
+} // namespace
+
+const char *GroupedConvolutionMutator::name()
+{
+    return "GroupedConvolutionMutator";
+}
+
+void GroupedConvolutionMutator::mutate(Graph &g)
+{
+    // Early exit if no Convolution layers exist in graph
+    if(g.nodes(NodeType::ConvolutionLayer).empty())
+    {
+        return;
+    }
+
+    // Total nodes
+    size_t total_nodes = g.nodes().size();
+
+    // Iterate over convolution nodes
+    for(unsigned int i = 0; i < total_nodes; ++i)
+    {
+        INode *node = g.node(i);
+        if(node != nullptr && node->type() == NodeType::ConvolutionLayer && arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1)
+        {
+            // Validate node
+            backends::IDeviceBackend *backend = backends::BackendRegistry::get().find_backend(node->assigned_target());
+            Status                    status  = backend->validate_node(*node);
+
+            // If grouped convolution is not supported
+            if(!bool(status))
+            {
+                // Down-cast node
+                auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node);
+
+                // Get internal convolution info
+                // TODO (geopin01) : Create a descriptor
+                const PadStrideInfo     conv_info       = conv_node->convolution_info();
+                const ConvolutionMethod conv_method     = conv_node->convolution_method();
+                const FastMathHint      fast_math_hint  = conv_node->fast_math_hint();
+                const unsigned int      num_groups      = conv_node->num_groups();
+                const NodeParams        params          = conv_node->common_node_params();
+                const Target            assigned_target = conv_node->assigned_target();
+
+                // Extract node ids
+                const NodeID input_id   = conv_node->input_id(0);
+                const NodeID weights_id = conv_node->input_id(1);
+                const NodeID bias_id    = conv_node->input_id(2);
+
+                // Get driving nodes
+                std::vector<NodeIdxPair> driving_nodes = get_driving_nodes(*node);
+
+                // Extract activation node accessor if any
+                auto node_accessor = conv_node->output(0)->extract_accessor();
+
+                // Current max tensor and node id
+                TensorID latest_tid = g.tensors().size();
+                NodeID   latest_nid = g.nodes().size();
+
+                // Create grouped convolution node
+                NodeID grouped_conv_id = create_grouped_convolution(g, params, { input_id, 0 }, weights_id, bias_id,
+                                                                    conv_info, conv_method, fast_math_hint, num_groups);
+
+                // Remove convolution node
+                g.remove_node(node->id());
+
+                // Update batch normalization node outputs
+                for(auto &driving_node : driving_nodes)
+                {
+                    g.add_connection(grouped_conv_id, 0, driving_node.node_id, driving_node.index);
+                }
+
+                // Update accessor to batch normalization node
+                g.node(grouped_conv_id)->output(0)->set_accessor(std::move(node_accessor));
+
+                // Configure new tensors and nodes
+                std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(), [](std::unique_ptr<Tensor> &t)
+                {
+                    configure_tensor(t.get());
+                });
+                std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(), [&assigned_target](std::unique_ptr<INode> &n)
+                {
+                    if(n != nullptr)
+                    {
+                        n->set_assigned_target(assigned_target);
+                    }
+                });
+            }
+        }
+    }
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/mutators/NodeExecutionMethodMutator.cpp b/src/graph/mutators/NodeExecutionMethodMutator.cpp
new file mode 100644
index 0000000000..896bf0742c
--- /dev/null
+++ b/src/graph/mutators/NodeExecutionMethodMutator.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/mutators/NodeExecutionMethodMutator.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/nodes/Nodes.h"
+
+#include "arm_compute/core/utils/misc/Cast.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace
+{
+/** Runs a default setter function on a given types of nodes
+ *
+ * @tparam Setter Setter function to run
+ *
+ * @param[in, out] g         Graph to extract the nodes from
+ * @param[in]      node_type Node type
+ * @param[in]      setter    Setter function
+ */
+template <typename Setter>
+void set_default_on_invalid_method(Graph &g, NodeType node_type, Setter &&setter)
+{
+    const std::vector<NodeID> &node_ids = g.nodes(node_type);
+    for(auto &node_id : node_ids)
+    {
+        INode *node = g.node(node_id);
+        if(node != nullptr)
+        {
+            // Validate node
+            backends::IDeviceBackend *backend = backends::BackendRegistry::get().find_backend(node->assigned_target());
+            Status                    status  = backend->validate_node(*node);
+
+            // Set default execution method in case of failure
+            if(!bool(status))
+            {
+                setter(node);
+            }
+        }
+    }
+}
+} // namespace
+
+const char *NodeExecutionMethodMutator::name()
+{
+    return "NodeExecutionMethodMutator";
+}
+
+void NodeExecutionMethodMutator::mutate(Graph &g)
+{
+    // Convolution Layer
+    set_default_on_invalid_method(g, NodeType::ConvolutionLayer, [](INode * n)
+    {
+        ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : "
+                                   << n->id() << " and Name: " << n->name() << std::endl);
+        auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n);
+        casted_node->set_convolution_method(ConvolutionMethod::Default);
+    });
+
+    // Depthwise Convolution Layer
+    set_default_on_invalid_method(g, NodeType::DepthwiseConvolutionLayer, [](INode * n)
+    {
+        ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : "
+                                   << n->id() << " and Name: " << n->name() << std::endl);
+        auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n);
+        casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default);
+    });
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index 6677330cec..82bfe25a3e 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -25,6 +25,7 @@
 
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
 #include "arm_compute/graph/nodes/Nodes.h"
 
 #include "arm_compute/core/utils/misc/Cast.h"
@@ -71,17 +72,7 @@ void fuse_batch_norm_with_activation(Graph &g)
                 if(bn_node->output(0)->accessor() == nullptr)
                 {
                     // Get driving nodes of activation node
-                    std::vector<NodeIdxPair> act_driving_nodes;
-                    for(auto &act_output_edge_id : act_node->output_edges())
-                    {
-                        auto act_output_edge = g.edge(act_output_edge_id);
-                        if(act_output_edge != nullptr)
-                        {
-                            ARM_COMPUTE_ERROR_ON(act_output_edge->consumer() == nullptr);
-                            act_driving_nodes.push_back(
-                            { act_output_edge->consumer_id(), act_output_edge->consumer_idx() });
-                        }
-                    }
+                    std::vector<NodeIdxPair> act_driving_nodes = get_driving_nodes(*act_node);
 
                     // Set activation info to batch normalization
                     bn_node->set_fused_activation(act_node->activation_info());
diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
index 2a8c029843..5f1c9c3186 100644
--- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp
+++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
@@ -25,6 +25,7 @@
 
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
 #include "arm_compute/graph/backends/BackendRegistry.h"
 #include "arm_compute/graph/nodes/SplitLayerNode.h"
 
@@ -42,10 +43,20 @@ const char *SplitLayerSubTensorMutator::name()
 
 void SplitLayerSubTensorMutator::mutate(Graph &g)
 {
+    // Early exit if no Split layers exist in graph
+    if(g.nodes(NodeType::SplitLayer).empty())
+    {
+        return;
+    }
+
+    // Perform topological sort
+    std::vector<NodeID> topological_sorted_node_ids = dfs(g);
+
     // Should be in reverse order of execution
-    for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes()))
+    for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
     {
-        if(node && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
+        INode *node = g.node(node_id);
+        if(node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
         {
             // Get output tensor
             Tensor *input_tensor = node->input(0);
@@ -63,7 +74,7 @@ void SplitLayerSubTensorMutator::mutate(Graph &g)
                 ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : "
                                               << node->id() << " and name : " << node->name() << std::endl);
 
-                auto *split_node = arm_compute::utils::cast::polymorphic_downcast<SplitLayerNode *>(node.get());
+                auto *split_node = arm_compute::utils::cast::polymorphic_downcast<SplitLayerNode *>(node);
 
                 const unsigned int axis          = split_node->axis();
                 const unsigned int num_splits    = split_node->num_splits();
diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp
index 81cb2c2e6c..e9cb0396eb 100644
--- a/src/graph/nodes/ConvolutionLayerNode.cpp
+++ b/src/graph/nodes/ConvolutionLayerNode.cpp
@@ -32,8 +32,12 @@ namespace arm_compute
 {
 namespace graph
 {
-ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method, FastMathHint fast_math_hint, QuantizationInfo out_quant_info)
-    : _info(std::move(info)), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info)
+ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo     info,
+                                           unsigned int      num_groups,
+                                           ConvolutionMethod method,
+                                           FastMathHint      fast_math_hint,
+                                           QuantizationInfo  out_quant_info)
+    : _info(std::move(info)), _num_groups(num_groups), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info)
 {
     _input_edges.resize(3, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -64,6 +68,11 @@ PadStrideInfo ConvolutionLayerNode::convolution_info() const
     return _info;
 }
 
+unsigned int ConvolutionLayerNode::num_groups() const
+{
+    return _num_groups;
+}
+
 TensorDescriptor ConvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
                                                                  const TensorDescriptor &weights_descriptor,
                                                                  const PadStrideInfo    &info)
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-08-15 12:14:46 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:54:54 +0000
commit	2a2db590fd179dcb8e1a575293cd2b887e2dc246 (patch)
tree	5e10da7cb6777f3020b84a2389b279ceef2be5ee
parent	c1961b51df2e15a01a5950139e81bbd47fbfa627 (diff)
download	ComputeLibrary-2a2db590fd179dcb8e1a575293cd2b887e2dc246.tar.gz