14 files changed, 949 insertions, 42 deletions
diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h
index e2532fd487..54fb66a573 100644
--- a/arm_compute/core/SubTensorInfo.h
+++ b/arm_compute/core/SubTensorInfo.h
@@ -27,6 +27,7 @@
 #include "arm_compute/core/ITensorInfo.h"
 
 #include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Strides.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/TensorShape.h"
diff --git a/arm_compute/graph/Nodes.h b/arm_compute/graph/Nodes.h
index 5e995ac8d1..b879aa1d09 100644
--- a/arm_compute/graph/Nodes.h
+++ b/arm_compute/graph/Nodes.h
@@ -27,6 +27,7 @@
 #include "arm_compute/graph/nodes/ActivationLayer.h"
 #include "arm_compute/graph/nodes/ConvolutionLayer.h"
 #include "arm_compute/graph/nodes/FullyConnectedLayer.h"
+#include "arm_compute/graph/nodes/NormalizationLayer.h"
 #include "arm_compute/graph/nodes/PoolingLayer.h"
 #include "arm_compute/graph/nodes/SoftmaxLayer.h"
 
diff --git a/arm_compute/graph/SubTensor.h b/arm_compute/graph/SubTensor.h
new file mode 100644
index 0000000000..a73b0d6b0e
--- /dev/null
+++ b/arm_compute/graph/SubTensor.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GRAPH_SUBTENSOR_H__
+#define __ARM_COMPUTE_GRAPH_SUBTENSOR_H__
+
+#include "arm_compute/graph/ITensorAccessor.h"
+#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/graph/Types.h"
+#include "support/ToolchainSupport.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace graph
+{
+/** SubTensor class */
+class SubTensor final
+{
+public:
+    /** Default Constructor */
+    SubTensor();
+    /** Constructor
+     *
+     * @param[in] parent       Parent to create sub-tensor from
+     * @param[in] tensor_shape Sub-tensor shape
+     * @param[in] coords       Starting coordinates of the sub-tensor in the parent tensor
+     */
+    SubTensor(Tensor &parent, TensorShape tensor_shape, Coordinates coords);
+    /** Constructor
+     *
+     * @param[in] parent       Parent to create sub-tensor from
+     * @param[in] tensor_shape Sub-tensor shape
+     * @param[in] coords       Starting coordinates of the sub-tensor in the parent tensor
+     * @param[in] target       Execution target
+     */
+    SubTensor(ITensor *parent, TensorShape tensor_shape, Coordinates coords, Hint target);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    SubTensor(const SubTensor &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    SubTensor &operator=(const SubTensor &) = delete;
+    /** Allow instances of this class to be moved */
+    SubTensor(SubTensor &&) = default;
+    /** Allow instances of this class to be moved */
+    SubTensor &operator=(SubTensor &&) = default;
+    /** Default Destructor */
+    ~SubTensor() = default;
+
+    /** Sets the given TensorInfo to the tensor
+     *
+     * @param[in] info TensorInfo to set
+     */
+    void set_info(SubTensorInfo &&info);
+    /** Returns tensor's TensorInfo
+     *
+     * @return TensorInfo of the tensor
+     */
+    const SubTensorInfo &info() const;
+    /** Returns a pointer to the internal tensor
+     *
+     * @return Tensor
+     */
+    ITensor *tensor();
+    /** Return the target that this tensor is pinned on
+     *
+     * @return Target of the tensor
+     */
+    Hint target() const;
+
+private:
+    /** Instantiates a sub-tensor */
+    void instantiate_subtensor();
+
+private:
+    Hint                     _target;    /**< Target that this tensor is pinned on */
+    Coordinates              _coords;    /**< SubTensor Coordinates */
+    SubTensorInfo            _info;      /**< SubTensor metadata */
+    ITensor                 *_parent;    /**< Parent tensor */
+    std::unique_ptr<ITensor> _subtensor; /**< SubTensor */
+};
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GRAPH_SUBTENSOR_H__ */
diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index 0b9596d589..538d64e9bb 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h
@@ -25,19 +25,24 @@
 #define __ARM_COMPUTE_GRAPH_TYPES_H__
 
 #include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/SubTensorInfo.h"
 #include "arm_compute/core/TensorInfo.h"
 
 namespace arm_compute
 {
 namespace graph
 {
-using arm_compute::ActivationLayerInfo;
 using arm_compute::ITensor;
 using arm_compute::TensorInfo;
+using arm_compute::SubTensorInfo;
 using arm_compute::DataType;
+using arm_compute::Coordinates;
 using arm_compute::TensorShape;
 using arm_compute::PadStrideInfo;
 using arm_compute::WeightsInfo;
+using arm_compute::ActivationLayerInfo;
+using arm_compute::NormType;
+using arm_compute::NormalizationLayerInfo;
 using arm_compute::PoolingLayerInfo;
 using arm_compute::PoolingType;
 
@@ -49,6 +54,12 @@ enum class Hint
     NEON       /**< Run node on a NEON capable device */
 };
 
+/**< Convolution method hint to the graph executor */
+enum class ConvolutionMethodHint
+{
+    GEMM,  /**< Convolution using GEMM */
+    DIRECT /**< Direct convolution */
+};
 } // namespace graph
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_GRAPH_TYPES_H__*/
diff --git a/arm_compute/graph/nodes/ConvolutionLayer.h b/arm_compute/graph/nodes/ConvolutionLayer.h
index c0e257bf6a..fcd097bdaa 100644
--- a/arm_compute/graph/nodes/ConvolutionLayer.h
+++ b/arm_compute/graph/nodes/ConvolutionLayer.h
@@ -25,15 +25,19 @@
 #define __ARM_COMPUTE_GRAPH_CONVOLUTION_LAYER_H__
 
 #include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/SubTensor.h"
 #include "arm_compute/graph/Tensor.h"
 #include "arm_compute/graph/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
 
 namespace arm_compute
 {
 namespace graph
 {
 /** Convolution layer node */
-class ConvolutionLayer : public INode
+class ConvolutionLayer final : public INode
 {
 public:
     /** Default Constructor
@@ -44,12 +48,30 @@ public:
      * @param[in] weights      Weights of the convolution layer
      * @param[in] biases       Bias of the convolution layer
      * @param[in] conv_info    Convolution information
-     * @param[in] weights_info Weights information
+     * @param[in] num_groups   (Optional) Number of groups, default = 1
+     * @param[in] weights_info (Optional) Weights information
      */
     template <typename AccessorTypeWeights, typename AccessorTypeBiases>
-    ConvolutionLayer(unsigned int conv_width, unsigned int conv_height, unsigned int ofm, AccessorTypeWeights &&weights,
-                     AccessorTypeBiases &&biases, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo())
-        : _conv_width(conv_width), _conv_height(conv_height), _ofm(ofm), _weights(std::move(weights)), _biases(std::move(biases)), _conv_info(conv_info), _weights_info(weights_info)
+    ConvolutionLayer(unsigned int          conv_width,
+                     unsigned int          conv_height,
+                     unsigned int          ofm,
+                     AccessorTypeWeights &&weights,
+                     AccessorTypeBiases &&biases,
+                     const PadStrideInfo   conv_info,
+                     unsigned int          num_groups   = 1,
+                     const WeightsInfo     weights_info = WeightsInfo())
+        : _conv_width(conv_width),
+          _conv_height(conv_height),
+          _ofm(ofm),
+          _weights(std::move(weights)),
+          _biases(std::move(biases)),
+          _conv_info(std::move(conv_info)),
+          _num_groups(num_groups),
+          _weights_info(std::move(weights_info)),
+          _is(nullptr),
+          _os(nullptr),
+          _ws(nullptr),
+          _bs(nullptr)
     {
     }
 
@@ -58,13 +80,35 @@ public:
     void print_info() override;
 
 private:
-    unsigned int         _conv_width;   /**< Convolution width */
-    unsigned int         _conv_height;  /**< Convolution height */
-    unsigned int         _ofm;          /**< Output feature maps */
-    Tensor               _weights;      /**< Weights tensor */
-    Tensor               _biases;       /**< Biases tensor */
-    const PadStrideInfo &_conv_info;    /**< Convolution layer information */
-    const WeightsInfo   &_weights_info; /**< Convolution layer weights information */
+    /** Instantiates a non-grouped convolution
+     *
+     * @param[in] conv_method_hint Hint that specifies which convolution layer method to use
+     *
+     * @return Convolution function
+     */
+    std::unique_ptr<arm_compute::IFunction> instantiate_convolution(ConvolutionMethodHint conv_method_hint);
+    /** Instantiates a grouped convolution
+     *
+     * @param[in] conv_method_hint Hint that specifies which convolution layer method to use
+     *
+     * @return Grouped Convolution function
+     */
+    std::unique_ptr<arm_compute::IFunction> instantiate_grouped_convolution(ConvolutionMethodHint conv_method_hint);
+
+private:
+    unsigned int        _conv_width;   /**< Convolution width */
+    unsigned int        _conv_height;  /**< Convolution height */
+    unsigned int        _ofm;          /**< Output feature maps */
+    Tensor              _weights;      /**< Weights tensor */
+    Tensor              _biases;       /**< Biases tensor */
+    const PadStrideInfo _conv_info;    /**< Convolution layer information */
+    unsigned int        _num_groups;   /**< Number of groups */
+    const WeightsInfo   _weights_info; /**< Convolution layer weights information */
+
+    std::unique_ptr<SubTensor[]> _is; /**< Input tensor sub-tensors used for grouped convolution */
+    std::unique_ptr<SubTensor[]> _os; /**< Output tensor sub-tensors used for grouped convolution */
+    std::unique_ptr<SubTensor[]> _ws; /**< Weights tensor sub-tensors used for grouped convolution */
+    std::unique_ptr<SubTensor[]> _bs; /**< Biases tensor sub-tensors used for grouped convolution */
 };
 } // namespace graph
 } // namespace arm_compute
diff --git a/arm_compute/graph/nodes/NormalizationLayer.h b/arm_compute/graph/nodes/NormalizationLayer.h
new file mode 100644
index 0000000000..40b9c2b467
--- /dev/null
+++ b/arm_compute/graph/nodes/NormalizationLayer.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GRAPH_NORMALIZATION_LAYER_H__
+#define __ARM_COMPUTE_GRAPH_NORMALIZATION_LAYER_H__
+
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/Types.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+/** Normalization layer node */
+class NormalizationLayer final : public INode
+{
+public:
+    /** Default Constructor
+     *
+     * @param[in] norm_info Normalization layer information
+     */
+    explicit NormalizationLayer(const NormalizationLayerInfo norm_info);
+
+    // Inherited methods overriden:
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(Hint hint, ITensor *input, ITensor *output) override;
+    void print_info() override;
+
+private:
+    const NormalizationLayerInfo _norm_info; /**< Normalization layer information */
+};
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GRAPH_NORMALIZATION_LAYER_H__ */
diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp
new file mode 100644
index 0000000000..cf5f635d33
--- /dev/null
+++ b/examples/graph_alexnet.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */
+#error "This example needs to be built with -DARM_COMPUTE_CL"
+#endif /* ARM_COMPUTE_CL */
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/Nodes.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CPP/CPPScheduler.h"
+#include "arm_compute/runtime/Scheduler.h"
+#include "support/ToolchainSupport.h"
+#include "utils/GraphUtils.h"
+#include "utils/Utils.h"
+
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+
+using namespace arm_compute::graph;
+using namespace arm_compute::graph_utils;
+
+/** Generates appropriate accessor according to the specified path
+ *
+ * @note If path is empty will generate a DummyAccessor else will generate a NumPyBinLoader
+ *
+ * @param[in] path      Path to the data files
+ * @param[in] data_file Relative path to the data files from path
+ *
+ * @return An appropriate tensor accessor
+ */
+std::unique_ptr<ITensorAccessor> get_accessor(const std::string &path, const std::string &data_file)
+{
+    if(path.empty())
+    {
+        return arm_compute::support::cpp14::make_unique<DummyAccessor>();
+    }
+    else
+    {
+        return arm_compute::support::cpp14::make_unique<NumPyBinLoader>(path + data_file);
+    }
+}
+
+/** Example demonstrating how to implement AlexNet's network using the Compute Library's graph API
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] batches )
+ */
+void main_graph_alexnet(int argc, const char **argv)
+{
+    std::string  data_path;   /** Path to the trainable data */
+    unsigned int batches = 4; /** Number of batches */
+
+    // Parse arguments
+    if(argc < 2)
+    {
+        // Print help
+        std::cout << "Usage: " << argv[0] << " [path_to_data] [batches]\n\n";
+        std::cout << "No data folder provided: using random values\n\n";
+    }
+    else if(argc == 2)
+    {
+        //Do something with argv[1]
+        data_path = argv[1];
+        std::cout << "Usage: " << argv[0] << " [path_to_data] [batches]\n\n";
+        std::cout << "No number of batches where specified, thus will use the default : " << batches << "\n\n";
+    }
+    else
+    {
+        //Do something with argv[1] and argv[2]
+        data_path = argv[1];
+        batches   = std::strtol(argv[2], nullptr, 0);
+    }
+
+    // Check if OpenCL is available and initialize the scheduler
+    Hint hint = Hint::NEON;
+    if(arm_compute::opencl_is_available())
+    {
+        arm_compute::CLScheduler::get().default_init();
+        hint = Hint::OPENCL;
+    }
+
+    Graph graph;
+    graph.set_info_enablement(true);
+
+    graph << hint
+          << Tensor(TensorInfo(TensorShape(227U, 227U, 3U, batches), 1, DataType::F32), DummyAccessor())
+          // Layer 1
+          << ConvolutionLayer(
+              11U, 11U, 96U,
+              get_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy"),
+              get_accessor(data_path, "/cnn_data/alexnet_model/conv1_b.npy"),
+              PadStrideInfo(4, 4, 0, 0))
+          << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+          << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
+          << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)))
+          // Layer 2
+          << ConvolutionLayer(
+              5U, 5U, 256U,
+              get_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy"),
+              get_accessor(data_path, "/cnn_data/alexnet_model/conv2_b.npy"),
+              PadStrideInfo(1, 1, 2, 2), 2)
+          << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+          << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
+          << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)))
+          // Layer 3
+          << ConvolutionLayer(
+              3U, 3U, 384U,
+              get_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy"),
+              get_accessor(data_path, "/cnn_data/alexnet_model/conv3_b.npy"),
+              PadStrideInfo(1, 1, 1, 1))
+          << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+          // Layer 4
+          << ConvolutionLayer(
+              3U, 3U, 384U,
+              get_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy"),
+              get_accessor(data_path, "/cnn_data/alexnet_model/conv4_b.npy"),
+              PadStrideInfo(1, 1, 1, 1), 2)
+          << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+          // Layer 5
+          << ConvolutionLayer(
+              3U, 3U, 256U,
+              get_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy"),
+              get_accessor(data_path, "/cnn_data/alexnet_model/conv5_b.npy"),
+              PadStrideInfo(1, 1, 1, 1), 2)
+          << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+          << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)))
+          // Layer 6
+          << FullyConnectedLayer(
+              4096U,
+              get_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy"),
+              get_accessor(data_path, "/cnn_data/alexnet_model/fc6_b.npy"))
+          << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+          // Layer 7
+          << FullyConnectedLayer(
+              4096U,
+              get_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy"),
+              get_accessor(data_path, "/cnn_data/alexnet_model/fc7_b.npy"))
+          << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+          // Layer 8
+          << FullyConnectedLayer(
+              1000U,
+              get_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy"),
+              get_accessor(data_path, "/cnn_data/alexnet_model/fc8_b.npy"))
+          // Softmax
+          << SoftmaxLayer()
+          << Tensor(DummyAccessor());
+
+    // Run graph
+    graph.run();
+}
+
+/** Main program for AlexNet
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] batches )
+ */
+int main(int argc, const char **argv)
+{
+    return arm_compute::utils::run_example(argc, argv, main_graph_alexnet);
+}
diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py
index 378e18eeff..72eae6f417 100755
--- a/scripts/clang_tidy_rules.py
+++ b/scripts/clang_tidy_rules.py
@@ -85,6 +85,8 @@ def filter_clang_tidy_lines( lines ):
                ("NESoftmaxLayerKernel.cpp" in line and "do not use C-style cast to convert between unrelated types" in line) or
                ("GraphUtils.cpp" in line and "consider replacing 'unsigned long' with 'uint32'" in line) or
                ("GraphUtils.cpp" in line and "consider replacing 'unsigned long' with 'uint64'" in line) or
+               ("ConvolutionLayer.cpp" in line and "move assignment operators should be marked noexcept" in line) or
+               ("ConvolutionLayer.cpp" in line and "move constructors should be marked noexcept" in line) or
                ("parameter 'memory_manager' is unused" in line) or
                ("parameter 'memory_manager' is copied for each invocation but only used as a const reference" in line) or
                "3rdparty" in line):
diff --git a/src/graph/SubTensor.cpp b/src/graph/SubTensor.cpp
new file mode 100644
index 0000000000..a70f32927b
--- /dev/null
+++ b/src/graph/SubTensor.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/graph/SubTensor.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/CL/CLSubTensor.h"
+#include "arm_compute/runtime/SubTensor.h"
+#include "utils/TypePrinter.h"
+
+using namespace arm_compute::graph;
+
+namespace
+{
+template <typename SubTensorType, typename ParentTensorType>
+std::unique_ptr<ITensor> initialise_subtensor(ITensor *parent, TensorShape shape, Coordinates coords)
+{
+    auto ptensor   = dynamic_cast<ParentTensorType *>(parent);
+    auto subtensor = arm_compute::support::cpp14::make_unique<SubTensorType>(ptensor, shape, coords);
+    return std::move(subtensor);
+}
+} // namespace
+
+SubTensor::SubTensor()
+    : _target(Hint::DONT_CARE), _coords(), _info(), _parent(nullptr), _subtensor(nullptr)
+{
+}
+
+SubTensor::SubTensor(Tensor &parent, TensorShape tensor_shape, Coordinates coords)
+    : _target(Hint::DONT_CARE), _coords(coords), _info(), _parent(nullptr), _subtensor(nullptr)
+{
+    ARM_COMPUTE_ERROR_ON(parent.tensor() == nullptr);
+    _parent = parent.tensor();
+    _info   = SubTensorInfo(parent.tensor()->info(), tensor_shape, coords);
+    _target = parent.target();
+
+    instantiate_subtensor();
+}
+
+SubTensor::SubTensor(ITensor *parent, TensorShape tensor_shape, Coordinates coords, Hint target)
+    : _target(target), _coords(coords), _info(), _parent(parent), _subtensor(nullptr)
+{
+    ARM_COMPUTE_ERROR_ON(parent == nullptr);
+    _info = SubTensorInfo(parent->info(), tensor_shape, coords);
+
+    instantiate_subtensor();
+}
+
+void SubTensor::set_info(SubTensorInfo &&info)
+{
+    _info = info;
+}
+
+const SubTensorInfo &SubTensor::info() const
+{
+    return _info;
+}
+
+ITensor *SubTensor::tensor()
+{
+    return _subtensor.get();
+}
+
+Hint SubTensor::target() const
+{
+    return _target;
+}
+
+void SubTensor::instantiate_subtensor()
+{
+    switch(_target)
+    {
+        case Hint::OPENCL:
+            _subtensor = initialise_subtensor<arm_compute::CLSubTensor, arm_compute::ICLTensor>(_parent, _info.tensor_shape(), _coords);
+            break;
+        case Hint::NEON:
+            _subtensor = initialise_subtensor<arm_compute::SubTensor, arm_compute::ITensor>(_parent, _info.tensor_shape(), _coords);
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Invalid Hint");
+    }
+}
diff --git a/src/graph/nodes/ConvolutionLayer.cpp b/src/graph/nodes/ConvolutionLayer.cpp
index b80bf93eff..ce9f096719 100644
--- a/src/graph/nodes/ConvolutionLayer.cpp
+++ b/src/graph/nodes/ConvolutionLayer.cpp
@@ -24,60 +24,155 @@
 #include "arm_compute/graph/nodes/ConvolutionLayer.h"
 
 #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
+#include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
 #include "support/ToolchainSupport.h"
+#include "utils/GraphTypePrinter.h"
 #include "utils/TypePrinter.h"
 
+#include <tuple>
+#include <vector>
+
 using namespace arm_compute::graph;
 
 namespace
 {
-template <typename ConvolutionType, typename TensorType, Hint hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+/** Calculates the output shaped of the convolution layer
+ *
+ * @param[in] input_shape   Input tensor shape
+ * @param[in] weights_shape Weights shape
+ * @param[in] conv_info     Convolution information (padding, stride, etc.)
+ *
+ * @return The expected output tensor shape
+ */
+TensorShape calculate_convolution_layer_output_shape(const TensorShape &input_shape, const TensorShape &weights_shape, const PadStrideInfo &conv_info)
 {
-    bool weights_are_loaded = weights.tensor() != nullptr;
-    bool biases_are_loaded  = biases.tensor() != nullptr;
+    unsigned int output_width  = 0;
+    unsigned int output_height = 0;
+
+    // Get output width and height
+    std::tie(output_width, output_height) = arm_compute::scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info);
 
+    // Create output shape
+    TensorShape output_shape = input_shape;
+    output_shape.set(0, output_width);
+    output_shape.set(1, output_height);
+    output_shape.set(2, weights_shape[3]);
+
+    return output_shape;
+}
+
+// Instantiate GEMM based convolution layer
+template <typename ConvolutionType, typename TensorType, Hint hint>
+std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+{
     auto conv = arm_compute::support::cpp14::make_unique<ConvolutionType>();
     conv->configure(
         dynamic_cast<TensorType *>(input),
-        dynamic_cast<TensorType *>(weights.set_target(hint)),
-        dynamic_cast<TensorType *>(biases.set_target(hint)),
+        dynamic_cast<TensorType *>(weights),
+        dynamic_cast<TensorType *>(biases),
         dynamic_cast<TensorType *>(output),
         conv_info, weights_info);
-    if(!weights_are_loaded)
-    {
-        weights.allocate_and_fill_if_needed();
-    }
-    if(!biases_are_loaded)
-    {
-        biases.allocate_and_fill_if_needed();
-    }
+    return std::move(conv);
+}
 
+// Instantiate direct convolution layer
+template <typename ConvolutionType, typename TensorType, Hint hint>
+std::unique_ptr<arm_compute::IFunction> instantiate_direct_function(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
+{
+    auto conv = arm_compute::support::cpp14::make_unique<ConvolutionType>();
+    conv->configure(
+        dynamic_cast<TensorType *>(input),
+        dynamic_cast<TensorType *>(weights),
+        dynamic_cast<TensorType *>(biases),
+        dynamic_cast<TensorType *>(output),
+        conv_info);
     return std::move(conv);
 }
 
 template <Hint                          hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info);
+std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+                                                    ConvolutionMethodHint conv_method);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<Hint::OPENCL>(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<Hint::OPENCL>(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+                                                                  ConvolutionMethodHint conv_method)
 {
-    return instantiate_function<arm_compute::CLConvolutionLayer, arm_compute::CLTensor, Hint::OPENCL>(input, weights, biases, output, conv_info, weights_info);
+    if(conv_method == ConvolutionMethodHint::GEMM)
+    {
+        return instantiate_function<arm_compute::CLConvolutionLayer, arm_compute::ICLTensor, Hint::OPENCL>(input, weights, biases, output, conv_info, weights_info);
+    }
+    else
+    {
+        return instantiate_direct_function<arm_compute::CLDirectConvolutionLayer, arm_compute::ICLTensor, Hint::OPENCL>(input, weights, biases, output, conv_info);
+    }
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<Hint::NEON>(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<Hint::NEON>(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+                                                                ConvolutionMethodHint conv_method)
 {
-    return instantiate_function<arm_compute::NEConvolutionLayer, arm_compute::Tensor, Hint::NEON>(input, weights, biases, output, conv_info, weights_info);
+    if(conv_method == ConvolutionMethodHint::GEMM)
+    {
+        return instantiate_function<arm_compute::NEConvolutionLayer, arm_compute::ITensor, Hint::NEON>(input, weights, biases, output, conv_info, weights_info);
+    }
+    else
+    {
+        return instantiate_direct_function<arm_compute::NEDirectConvolutionLayer, arm_compute::ITensor, Hint::NEON>(input, weights, biases, output, conv_info);
+    }
 }
 } // namespace
 
+/** Grouped Convolution function */
+class GroupedConvolutionFunction final : public arm_compute::IFunction
+{
+public:
+    /** Default Constructor */
+    GroupedConvolutionFunction()
+        : _convolutions()
+    {
+    }
+    /** Default Destructor */
+    ~GroupedConvolutionFunction() final = default;
+    /** Prevent instances from being copy constructed */
+    GroupedConvolutionFunction(const GroupedConvolutionFunction &) = delete;
+    /** Prevent instances from being copy assigned */
+    GroupedConvolutionFunction &operator=(const GroupedConvolutionFunction &) = delete;
+    /** Allow instances to be move constructed */
+    GroupedConvolutionFunction(GroupedConvolutionFunction &&) noexcept = default;
+    /** Allow instances to be move assigned */
+    GroupedConvolutionFunction &operator=(GroupedConvolutionFunction &&) noexcept = default;
+    /** Adds a convolution
+     *
+     * @param convolution Convolution function to add
+     */
+    void add_convolution_function(std::unique_ptr<IFunction> convolution)
+    {
+        _convolutions.emplace_back(std::move(convolution));
+    }
+
+    // Inherited methods overriden:
+    void run() override
+    {
+        for(auto &c : _convolutions)
+        {
+            c->run();
+        }
+    }
+
+private:
+    std::vector<std::unique_ptr<IFunction>> _convolutions;
+};
+
 std::unique_ptr<arm_compute::IFunction> ConvolutionLayer::instantiate_node(Hint hint, ITensor *input, ITensor *output)
 {
+    // Set weights and biases info
     if(_weights.tensor() == nullptr)
     {
-        _weights.set_info(TensorInfo(TensorShape(_conv_width, _conv_height, input->info()->dimension(2), _ofm), input->info()->num_channels(), input->info()->data_type(),
+        _weights.set_info(TensorInfo(TensorShape(_conv_width, _conv_height, input->info()->dimension(2) / _num_groups, _ofm),
+                                     input->info()->num_channels(), input->info()->data_type(),
                                      input->info()->fixed_point_position()));
     }
     if(_biases.tensor() == nullptr)
@@ -90,13 +185,40 @@ std::unique_ptr<arm_compute::IFunction> ConvolutionLayer::instantiate_node(Hint
     _input  = input;
     _output = output;
 
-    if(_hint == Hint::OPENCL)
+    // Check if the weights and biases are loaded
+    bool weights_are_loaded = _weights.tensor() != nullptr;
+    bool biases_are_loaded  = _weights.tensor() != nullptr;
+
+    // Set bias and weights target
+    _weights.set_target(_hint);
+    _biases.set_target(_hint);
+
+    // Calculate output shape
+    TensorShape output_shape = calculate_convolution_layer_output_shape(_input->info()->tensor_shape(), _weights.info().tensor_shape(), _conv_info);
+
+    // Output auto inizialitation if not yet initialized
+    arm_compute::auto_init_if_empty(*_output->info(), output_shape, 1, _input->info()->data_type(), _input->info()->fixed_point_position());
+
+    // Create appropriate convolution function
+    // TODO(geopin01): Fix convolution layer hints once the GraphContext has been added
+    if(_num_groups == 1)
     {
-        func = instantiate<Hint::OPENCL>(input, _weights, _biases, output, _conv_info, _weights_info);
+        func = instantiate_convolution(ConvolutionMethodHint::GEMM);
     }
     else
     {
-        func = instantiate<Hint::NEON>(input, _weights, _biases, output, _conv_info, _weights_info);
+        func = instantiate_grouped_convolution(ConvolutionMethodHint::GEMM);
+    }
+
+    // Fill weights
+    if(!weights_are_loaded)
+    {
+        _weights.allocate_and_fill_if_needed();
+    }
+    // Fill biases
+    if(!biases_are_loaded)
+    {
+        _biases.allocate_and_fill_if_needed();
     }
 
     return func;
@@ -112,6 +234,97 @@ void ConvolutionLayer::print_info()
     {
         std::cout << "Instantiating NEConvolutionLayer";
     }
-    std::cout << " Type: " << _input->info()->data_type() << " Input Shape: " << _input->info()->tensor_shape() << " Weights shape: " << _weights.info().tensor_shape() << " Biases Shape: " <<
-              _biases.info().tensor_shape() << " Output Shape: " << _output->info()->tensor_shape() << " PadStrideInfo: " << _conv_info << "WeightsInfo: " << _weights_info << std::endl;
+    std::cout << " Data Type: " << _input->info()->data_type()
+              << " Input Shape: " << _input->info()->tensor_shape()
+              << " Weights shape: " << _weights.info().tensor_shape()
+              << " Biases Shape: " << _biases.info().tensor_shape()
+              << " Output Shape: " << _output->info()->tensor_shape()
+              << " PadStrideInfo: " << _conv_info
+              << " Groups: " << _num_groups
+              << " WeightsInfo: " << _weights_info
+              << std::endl;
+}
+
+std::unique_ptr<arm_compute::IFunction> ConvolutionLayer::instantiate_convolution(ConvolutionMethodHint conv_method_hint)
+{
+    std::unique_ptr<arm_compute::IFunction> func;
+    if(_hint == Hint::OPENCL)
+    {
+        func = instantiate<Hint::OPENCL>(_input, _weights.tensor(), _biases.tensor(), _output, _conv_info, _weights_info, conv_method_hint);
+    }
+    else
+    {
+        func = instantiate<Hint::NEON>(_input, _weights.tensor(), _biases.tensor(), _output, _conv_info, _weights_info, conv_method_hint);
+    }
+    return func;
+}
+
+std::unique_ptr<arm_compute::IFunction> ConvolutionLayer::instantiate_grouped_convolution(ConvolutionMethodHint conv_method_hint)
+{
+    // Get tensor shapes
+    TensorShape input_shape   = _input->info()->tensor_shape();
+    TensorShape output_shape  = _output->info()->tensor_shape();
+    TensorShape weights_shape = _weights.info().tensor_shape();
+    TensorShape biases_shape  = _biases.info().tensor_shape();
+
+    ARM_COMPUTE_ERROR_ON_MSG((input_shape.z() % _num_groups) != 0, "Input depth not multiple of the number of groups!");
+    ARM_COMPUTE_ERROR_ON_MSG((output_shape.z() % _num_groups) != 0, "Output depth not multiple of the number of groups!");
+    ARM_COMPUTE_ERROR_ON_MSG((weights_shape[3] % _num_groups) != 0, "Number of kernels not multiple of the number of groups!");
+    ARM_COMPUTE_ERROR_ON_MSG((biases_shape.x() % _num_groups) != 0, "Biases not multiple of the number of groups!");
+
+    // Create a grouped convolution function
+    auto grouped_conv = arm_compute::support::cpp14::make_unique<GroupedConvolutionFunction>();
+
+    // Create sub-tensors vectors
+    _is = arm_compute::support::cpp14::make_unique<SubTensor[]>(_num_groups);
+    _os = arm_compute::support::cpp14::make_unique<SubTensor[]>(_num_groups);
+    _ws = arm_compute::support::cpp14::make_unique<SubTensor[]>(_num_groups);
+    _bs = arm_compute::support::cpp14::make_unique<SubTensor[]>(_num_groups);
+
+    // Calculate sub-tensor splits
+    const int input_split   = input_shape.z() / _num_groups;
+    const int output_split  = output_shape.z() / _num_groups;
+    const int weights_split = weights_shape[3] / _num_groups;
+    const int biases_split  = biases_shape.x() / _num_groups;
+
+    // Calculate sub-tensor shapes
+    input_shape.set(2, input_split);
+    output_shape.set(2, output_split);
+    weights_shape.set(3, weights_split);
+    biases_shape.set(0, biases_split);
+
+    // Configure sub-tensors
+    for(int i = 0; i < static_cast<int>(_num_groups); ++i)
+    {
+        // Create convolution function
+        std::unique_ptr<arm_compute::IFunction> func;
+
+        // Calculate sub-tensors starting coordinates
+        Coordinates input_coord(0, 0, input_split * i);
+        Coordinates output_coord(0, 0, output_split * i);
+        Coordinates weights_coord(0, 0, 0, weights_split * i);
+        Coordinates biases_coord(biases_split * i);
+
+        // Create sub-tensors for input, output, weights and bias
+        auto hint_to_use = (_hint == Hint::OPENCL) ? Hint::OPENCL : Hint::NEON;
+        _is[i]           = SubTensor(_input, input_shape, input_coord, hint_to_use);
+        _os[i]           = SubTensor(_output, output_shape, output_coord, hint_to_use);
+        _ws[i]           = SubTensor(_weights.tensor(), weights_shape, weights_coord, hint_to_use);
+        _bs[i]           = SubTensor(_biases.tensor(), biases_shape, biases_coord, hint_to_use);
+
+        // Instantiate convolution function
+        if(_hint == Hint::OPENCL)
+        {
+            func = instantiate<Hint::OPENCL>(_is[i].tensor(), _ws[i].tensor(), _bs[i].tensor(), _os[i].tensor(), _conv_info, _weights_info, conv_method_hint);
+        }
+        else
+        {
+            func = instantiate<Hint::NEON>(_is[i].tensor(), _ws[i].tensor(), _bs[i].tensor(), _os[i].tensor(), _conv_info, _weights_info, conv_method_hint);
+        }
+
+        // Add convolution function to the list of convolutions for the grouped convolution
+        grouped_conv->add_convolution_function(std::move(func));
+    }
+
+    return std::move(grouped_conv);
 }
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index 8d244cb515..fcc86be8fa 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -33,6 +33,16 @@ using namespace arm_compute::graph;
 
 namespace
 {
+TensorShape calculate_fullyconnected_layer_output_shape(const TensorShape &input_shape, unsigned int output_neurons)
+{
+    // Note: Only 1D batch space is supported at the moment
+    unsigned int batches = input_shape[1];
+    if(input_shape.num_dimensions() > 2)
+    {
+        batches = input_shape[3];
+    }
+    return TensorShape(output_neurons, batches);
+}
 template <typename FullyConnectedType, typename TensorType, Hint hint>
 std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output)
 {
@@ -95,8 +105,10 @@ std::unique_ptr<arm_compute::IFunction> FullyConnectedLayer::instantiate_node(Hi
         _biases.set_info(TensorInfo(TensorShape(_num_neurons), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
     }
 
-    arm_compute::auto_init_if_empty(*output->info(), TensorShape(_num_neurons, input->info()->dimension(1)), input->info()->num_channels(), input->info()->data_type(),
-                                    input->info()->fixed_point_position());
+    // Auto configure output
+    arm_compute::auto_init_if_empty(*output->info(),
+                                    calculate_fullyconnected_layer_output_shape(input->info()->tensor_shape(), _num_neurons),
+                                    input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position());
 
     std::unique_ptr<arm_compute::IFunction> func;
     _hint   = hint;
@@ -125,6 +137,10 @@ void FullyConnectedLayer::print_info()
     {
         std::cout << "Instantiating NEFullyConnectedLayer";
     }
-    std::cout << " Type: " << _input->info()->data_type() << " Input Shape: " << _input->info()->tensor_shape() << " Weights shape: " << _weights.info().tensor_shape() << " Biases Shape: " <<
-              _biases.info().tensor_shape() << " Output Shape: " << _output->info()->tensor_shape() << std::endl;
+    std::cout << " Type: " << _input->info()->data_type()
+              << " Input Shape: " << _input->info()->tensor_shape()
+              << " Weights shape: " << _weights.info().tensor_shape()
+              << " Biases Shape: " << _biases.info().tensor_shape()
+              << " Output Shape: " << _output->info()->tensor_shape()
+              << std::endl;
 }
diff --git a/src/graph/nodes/NormalizationLayer.cpp b/src/graph/nodes/NormalizationLayer.cpp
new file mode 100644
index 0000000000..55ef9bf243
--- /dev/null
+++ b/src/graph/nodes/NormalizationLayer.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/NormalizationLayer.h"
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "support/ToolchainSupport.h"
+#include "utils/TypePrinter.h"
+
+using namespace arm_compute::graph;
+
+namespace
+{
+template <typename NormalizationType, typename TensorType, Hint hint>
+std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
+{
+    auto norm = arm_compute::support::cpp14::make_unique<NormalizationType>();
+    norm->configure(
+        dynamic_cast<TensorType *>(input),
+        dynamic_cast<TensorType *>(output),
+        norm_info);
+
+    return std::move(norm);
+}
+
+template <Hint                          hint>
+std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info);
+
+template <>
+std::unique_ptr<arm_compute::IFunction> instantiate<Hint::OPENCL>(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
+{
+    return instantiate_function<arm_compute::CLNormalizationLayer, arm_compute::CLTensor, Hint::OPENCL>(input, output, norm_info);
+}
+
+template <>
+std::unique_ptr<arm_compute::IFunction> instantiate<Hint::NEON>(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
+{
+    return instantiate_function<arm_compute::NENormalizationLayer, arm_compute::Tensor, Hint::NEON>(input, output, norm_info);
+}
+} // namespace
+
+NormalizationLayer::NormalizationLayer(const NormalizationLayerInfo norm_info)
+    : _norm_info(norm_info)
+{
+}
+
+std::unique_ptr<arm_compute::IFunction> NormalizationLayer::instantiate_node(Hint hint, ITensor *input, ITensor *output)
+{
+    std::unique_ptr<arm_compute::IFunction> func;
+    _hint   = hint;
+    _input  = input;
+    _output = output;
+
+    if(_hint == Hint::OPENCL)
+    {
+        func = instantiate<Hint::OPENCL>(input, output, _norm_info);
+    }
+    else
+    {
+        func = instantiate<Hint::NEON>(input, output, _norm_info);
+    }
+
+    return func;
+}
+
+void NormalizationLayer::print_info()
+{
+    if(_hint == Hint::OPENCL)
+    {
+        std::cout << "Instantiating CLNormalizationLayer";
+    }
+    else
+    {
+        std::cout << "Instantiating NENormalizationLayer";
+    }
+
+    std::cout << " Data Type: " << _input->info()->data_type()
+              << " Input shape: " << _input->info()->tensor_shape()
+              << " Output shape: " << _output->info()->tensor_shape()
+              << " Normalization info: " << _norm_info
+              << std::endl;
+}
diff --git a/utils/GraphTypePrinter.h b/utils/GraphTypePrinter.h
new file mode 100644
index 0000000000..be56d59853
--- /dev/null
+++ b/utils/GraphTypePrinter.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_GRAPH_TYPE_PRINTER_H__
+#define __ARM_COMPUTE_TEST_GRAPH_TYPE_PRINTER_H__
+
+#include "arm_compute/graph/Types.h"
+
+#include <ostream>
+#include <sstream>
+#include <string>
+
+namespace arm_compute
+{
+namespace graph
+{
+/** Formatted output of the @ref ConvolutionMethodHint type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const ConvolutionMethodHint &conv_method)
+{
+    switch(conv_method)
+    {
+        case ConvolutionMethodHint::DIRECT:
+            os << "DIRECT";
+            break;
+        case ConvolutionMethodHint::GEMM:
+            os << "GEMM";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+inline std::string to_string(const ConvolutionMethodHint &conv_method)
+{
+    std::stringstream str;
+    str << conv_method;
+    return str.str();
+}
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_GRAPH_TYPE_PRINTER_H__ */
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index 758034884c..041ec1887a 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -264,6 +264,13 @@ inline std::string to_string(const arm_compute::NormalizationLayerInfo &info)
     return str.str();
 }
 
+/** Formatted output of @ref NormalizationLayerInfo. */
+inline ::std::ostream &operator<<(::std::ostream &os, const NormalizationLayerInfo &info)
+{
+    os << info.type();
+    return os;
+}
+
 /** Formatted output of the PoolingType type. */
 inline ::std::ostream &operator<<(::std::ostream &os, const PoolingType &pool_type)
 {