From e2c82fee3b6d38f6e79412c78176792b817defd0 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Mon, 2 Oct 2017 18:51:47 +0100
Subject: COMPMID-550: Adds support for branches.

Change-Id: I778007c9221ce3156400284c4039b90245eb2b7f
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/90043
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 arm_compute/core/SubTensorInfo.h                  |  14 +-
 arm_compute/graph/CL/CLMap.h                      |  10 +-
 arm_compute/graph/CL/CLUnmap.h                    |   8 +-
 arm_compute/graph/Graph.h                         |  12 +-
 arm_compute/graph/INode.h                         |   3 +-
 arm_compute/graph/ITensorObject.h                 |  75 ++++++++
 arm_compute/graph/Nodes.h                         |   1 +
 arm_compute/graph/SubGraph.h                      |  94 ++++++++++
 arm_compute/graph/SubTensor.h                     |  42 ++---
 arm_compute/graph/Tensor.h                        |  43 ++---
 arm_compute/graph/Types.h                         |  25 ++-
 arm_compute/graph/nodes/ActivationLayer.h         |   4 +-
 arm_compute/graph/nodes/BatchNormalizationLayer.h |   3 +-
 arm_compute/graph/nodes/BranchLayer.h             |  77 ++++++++
 arm_compute/graph/nodes/ConvolutionLayer.h        |   3 +-
 arm_compute/graph/nodes/DepthConcatenateLayer.h   |  58 ++++++
 arm_compute/graph/nodes/FloorLayer.h              |   4 +-
 arm_compute/graph/nodes/FullyConnectedLayer.h     |   3 +-
 arm_compute/graph/nodes/L2NormalizeLayer.h        |   4 +-
 arm_compute/graph/nodes/NormalizationLayer.h      |   3 +-
 arm_compute/graph/nodes/PoolingLayer.h            |   4 +-
 arm_compute/graph/nodes/SoftmaxLayer.h            |   7 +-
 examples/graph_googlenet.cpp                      | 214 ++++++++++++++++++++++
 src/core/SubTensorInfo.cpp                        |  12 +-
 src/graph/CL/CLMap.cpp                            |  11 +-
 src/graph/CL/CLUnmap.cpp                          |  11 +-
 src/graph/Graph.cpp                               |  52 ++++--
 src/graph/INode.cpp                               |   2 -
 src/graph/SubGraph.cpp                            | 106 +++++++++++
 src/graph/SubTensor.cpp                           |  40 ++--
 src/graph/Tensor.cpp                              |  13 +-
 src/graph/nodes/ActivationLayer.cpp               |  32 ++--
 src/graph/nodes/BatchNormalizationLayer.cpp       |  37 ++--
 src/graph/nodes/BranchLayer.cpp                   | 176 ++++++++++++++++++
 src/graph/nodes/ConvolutionLayer.cpp              |  46 +++--
 src/graph/nodes/DepthConcatenateLayer.cpp         | 106 +++++++++++
 src/graph/nodes/FloorLayer.cpp                    |  26 ++-
 src/graph/nodes/FullyConnectedLayer.cpp           |  46 ++---
 src/graph/nodes/L2NormalizeLayer.cpp              |  26 ++-
 src/graph/nodes/NormalizationLayer.cpp            |  32 ++--
 src/graph/nodes/PoolingLayer.cpp                  |  32 ++--
 src/graph/nodes/SoftmaxLayer.cpp                  |  32 ++--
 42 files changed, 1274 insertions(+), 275 deletions(-)
 create mode 100644 arm_compute/graph/ITensorObject.h
 create mode 100644 arm_compute/graph/SubGraph.h
 create mode 100644 arm_compute/graph/nodes/BranchLayer.h
 create mode 100644 arm_compute/graph/nodes/DepthConcatenateLayer.h
 create mode 100644 examples/graph_googlenet.cpp
 create mode 100644 src/graph/SubGraph.cpp
 create mode 100644 src/graph/nodes/BranchLayer.cpp
 create mode 100644 src/graph/nodes/DepthConcatenateLayer.cpp

diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h
index 54fb66a573..81a27026e7 100644
--- a/arm_compute/core/SubTensorInfo.h
+++ b/arm_compute/core/SubTensorInfo.h
@@ -61,6 +61,14 @@ public:
     SubTensorInfo(SubTensorInfo &&) = default;
     /** Allow instances of this class to be moved */
     SubTensorInfo &operator=(SubTensorInfo &&) = default;
+    /** Returns the coordinates of the sub-tensor inside the parent tensor
+     *
+     * @return Sub-tensor coordinates
+     */
+    Coordinates coords() const
+    {
+        return _coords;
+    }
 
     // Inherited methods overridden:
     void set_data_type(DataType data_type) override
@@ -171,7 +179,11 @@ public:
     void set_valid_region(ValidRegion valid_region) override
     {
         ARM_COMPUTE_ERROR_ON(_parent == nullptr);
-        ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(_parent->valid_region(), valid_region);
+        // Check if subtensor is valid if parent is configured
+        if(_parent->tensor_shape().total_size() != 0)
+        {
+            ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(_parent->valid_region(), valid_region);
+        }
         _valid_region = std::move(valid_region);
     }
 
diff --git a/arm_compute/graph/CL/CLMap.h b/arm_compute/graph/CL/CLMap.h
index a205ebcad1..732a1df77f 100644
--- a/arm_compute/graph/CL/CLMap.h
+++ b/arm_compute/graph/CL/CLMap.h
@@ -29,11 +29,11 @@
 
 namespace arm_compute
 {
-class CLTensor;
+class ICLTensor;
 
 namespace graph
 {
-class Tensor;
+class ITensorObject;
 /** OpenCL map function */
 class CLMap : public arm_compute::IFunction
 {
@@ -43,7 +43,7 @@ public:
      * @param[in] tensor   Tensor to map
      * @param[in] blocking Flag to specify if the map should be blocking or not (defaults to false)
      */
-    CLMap(Tensor *tensor, bool blocking = false);
+    CLMap(ITensorObject *tensor, bool blocking = false);
     /** Prevent instances from being copy constructed */
     CLMap(const CLMap &) = delete;
     /** Prevent instances from being copy assigned */
@@ -57,8 +57,8 @@ public:
     void run() override;
 
 private:
-    arm_compute::CLTensor *_tensor;   /**< Tensor */
-    bool                   _blocking; /**< Blocking flag */
+    arm_compute::ICLTensor *_tensor;   /**< Tensor */
+    bool                    _blocking; /**< Blocking flag */
 };
 } // namespace graph
 } // namespace arm_compute
diff --git a/arm_compute/graph/CL/CLUnmap.h b/arm_compute/graph/CL/CLUnmap.h
index a72706353b..17745c436b 100644
--- a/arm_compute/graph/CL/CLUnmap.h
+++ b/arm_compute/graph/CL/CLUnmap.h
@@ -29,11 +29,11 @@
 
 namespace arm_compute
 {
-class CLTensor;
+class ICLTensor;
 
 namespace graph
 {
-class Tensor;
+class ITensorObject;
 /** OpenCL un-map function */
 class CLUnmap : public arm_compute::IFunction
 {
@@ -42,7 +42,7 @@ public:
      *
      * @param[in] tensor Tensor to un-map
      */
-    CLUnmap(Tensor *tensor);
+    CLUnmap(ITensorObject *tensor);
     /** Prevent instances from being copy constructed */
     CLUnmap(const CLUnmap &) = delete;
     /** Prevent instances from being copy assigned */
@@ -56,7 +56,7 @@ public:
     void run() override;
 
 private:
-    arm_compute::CLTensor *_tensor; /**< Tensor */
+    arm_compute::ICLTensor *_tensor; /**< Tensor */
 };
 } // namespace graph
 } // namespace arm_compute
diff --git a/arm_compute/graph/Graph.h b/arm_compute/graph/Graph.h
index 9d06f44bee..4afe96b40b 100644
--- a/arm_compute/graph/Graph.h
+++ b/arm_compute/graph/Graph.h
@@ -25,6 +25,8 @@
 #define __ARM_COMPUTE_GRAPH_GRAPH_H__
 
 #include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/ITensorObject.h"
+#include "arm_compute/graph/SubTensor.h"
 #include "arm_compute/graph/Tensor.h"
 #include "arm_compute/graph/Types.h"
 #include "support/ToolchainSupport.h"
@@ -64,7 +66,7 @@ public:
      *
      * @param[in] tensor Tensor to add
      */
-    void add_tensor(std::unique_ptr<Tensor> tensor);
+    void add_tensor_object(std::unique_ptr<ITensorObject> tensor);
     /** Manually sets the output of the current node
      *
      * @param[in] tmp Output info to set
@@ -98,6 +100,14 @@ Graph &operator<<(Graph &graph, TensorInfo &&info);
  * @return Updated graph
  */
 Graph &operator<<(Graph &graph, Tensor &&tensor);
+/** Overloaded stream operator to add a sub-tensor to the graph
+ *
+ * @param[in, out] graph      Graph to add the tensor
+ * @param[in]      sub_tensor Sub-tensor to be added
+ *
+ * @return Updated graph
+ */
+Graph &operator<<(Graph &graph, SubTensor &&sub_tensor);
 /** Overloaded stream operator to provide a target hint to the graph
  *
  * @param[in, out] graph       Graph to provide the hint to
diff --git a/arm_compute/graph/INode.h b/arm_compute/graph/INode.h
index 1b22bdf639..56b50b9424 100644
--- a/arm_compute/graph/INode.h
+++ b/arm_compute/graph/INode.h
@@ -25,6 +25,7 @@
 #define __ARM_COMPUTE_GRAPH_INODE_H__
 
 #include "arm_compute/graph/GraphContext.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 
@@ -46,7 +47,7 @@ public:
      * @param[in] input  Input tensor of the node
      * @param[in] output Output tensor of the node
      */
-    virtual std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) = 0;
+    virtual std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) = 0;
     /** Override the existing target hint
      *
      * @note If the input is DONT_CARE then the method has to pick a technology,
diff --git a/arm_compute/graph/ITensorObject.h b/arm_compute/graph/ITensorObject.h
new file mode 100644
index 0000000000..61be2865c7
--- /dev/null
+++ b/arm_compute/graph/ITensorObject.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GRAPH_ITENSOROBJECT_H__
+#define __ARM_COMPUTE_GRAPH_ITENSOROBJECT_H__
+
+#include "arm_compute/graph/ITensorAccessor.h"
+#include "arm_compute/graph/Types.h"
+#include "support/ToolchainSupport.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace graph
+{
+/** Tensor object interface */
+class ITensorObject
+{
+public:
+    /** Default Destructor */
+    virtual ~ITensorObject() = default;
+    /** Calls accessor on tensor
+     *
+     * @return True if succeeds else false
+     */
+    virtual bool call_accessor() = 0;
+    /** Checks if tensor has an accessor set.
+     *
+     * @return True if an accessor has been set else false
+     */
+    virtual bool has_accessor() const = 0;
+    /** Sets target of the tensor
+     *
+     * @param[in] target Target where the tensor should be pinned in
+     *
+     * @return Backend tensor
+     */
+    virtual ITensor *set_target(TargetHint target) = 0;
+    /** Returns a pointer to the internal tensor
+     *
+     * @return Tensor
+     */
+    virtual ITensor *tensor() = 0;
+    /** Return the target that this tensor is pinned on
+     *
+     * @return Target of the tensor
+     */
+    virtual TargetHint target() const = 0;
+    /** Allocates the tensor */
+    virtual void allocate() = 0;
+};
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GRAPH_ITENSOROBJECT_H__ */
diff --git a/arm_compute/graph/Nodes.h b/arm_compute/graph/Nodes.h
index 548deabeb6..d1ed715ae8 100644
--- a/arm_compute/graph/Nodes.h
+++ b/arm_compute/graph/Nodes.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/graph/nodes/ActivationLayer.h"
 #include "arm_compute/graph/nodes/BatchNormalizationLayer.h"
+#include "arm_compute/graph/nodes/BranchLayer.h"
 #include "arm_compute/graph/nodes/ConvolutionLayer.h"
 #include "arm_compute/graph/nodes/FloorLayer.h"
 #include "arm_compute/graph/nodes/FullyConnectedLayer.h"
diff --git a/arm_compute/graph/SubGraph.h b/arm_compute/graph/SubGraph.h
new file mode 100644
index 0000000000..d768bf9119
--- /dev/null
+++ b/arm_compute/graph/SubGraph.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GRAPH_SUBGRAPH_H__
+#define __ARM_COMPUTE_GRAPH_SUBGRAPH_H__
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/ITensorObject.h"
+#include "arm_compute/graph/SubTensor.h"
+#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/graph/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace graph
+{
+/** SubGraph class */
+class SubGraph
+{
+public:
+    /** Constructor */
+    SubGraph();
+    /** Adds a node to the graph
+     *
+     * @param[in] node Node to add
+     */
+    void add_node(std::unique_ptr<INode> node);
+    /** Adds a tensor to the graph
+     *
+     * @param[in] tensor Tensor to add
+     */
+    void add_tensor_object(std::unique_ptr<ITensorObject> tensor);
+    /** Constructs a graph from a subgraph
+     *
+     * @param[in] hint   Execution target hint
+     * @param[in] input  Input to the graph
+     * @param[in] output Output to the graph
+     *
+     * @return A graph
+     */
+    std::unique_ptr<Graph> construct(TargetHint hint, std::unique_ptr<ITensorObject> input, std::unique_ptr<ITensorObject> output);
+    /** Checks if the subgraph has an input
+     *
+     * @return True if the sub-graph has an input else false
+     */
+    bool has_input() const;
+    /** Checks if the subgraph has an output
+     *
+     * @return True if the sub-graph has an output else false
+     */
+    bool has_output() const;
+
+private:
+    std::vector<std::unique_ptr<INode>> _nodes;
+    std::unique_ptr<ITensorObject>      _input;
+    std::unique_ptr<ITensorObject>      _output;
+};
+
+SubGraph &operator<<(SubGraph &graph, Tensor &&tensor);
+SubGraph &operator<<(SubGraph &graph, SubTensor &&sub_tensor);
+
+template <typename Node>
+SubGraph &operator<<(SubGraph &sub_graph, Node node)
+{
+    sub_graph.add_node(arm_compute::support::cpp14::make_unique<Node>(std::move(node)));
+    return sub_graph;
+}
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GRAPH_INODE_H__ */
diff --git a/arm_compute/graph/SubTensor.h b/arm_compute/graph/SubTensor.h
index ace93d20a3..22a0a9e27f 100644
--- a/arm_compute/graph/SubTensor.h
+++ b/arm_compute/graph/SubTensor.h
@@ -25,6 +25,7 @@
 #define __ARM_COMPUTE_GRAPH_SUBTENSOR_H__
 
 #include "arm_compute/graph/ITensorAccessor.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Tensor.h"
 #include "arm_compute/graph/Types.h"
 #include "support/ToolchainSupport.h"
@@ -36,7 +37,7 @@ namespace arm_compute
 namespace graph
 {
 /** SubTensor class */
-class SubTensor final
+class SubTensor final : public ITensorObject
 {
 public:
     /** Default Constructor */
@@ -55,7 +56,7 @@ public:
      * @param[in] coords       Starting coordinates of the sub-tensor in the parent tensor
      * @param[in] target       Execution target
      */
-    SubTensor(ITensor *parent, TensorShape tensor_shape, Coordinates coords, TargetHint target);
+    SubTensor(arm_compute::ITensor *parent, TensorShape tensor_shape, Coordinates coords, TargetHint target);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     SubTensor(const SubTensor &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -67,37 +68,24 @@ public:
     /** Default Destructor */
     ~SubTensor() = default;
 
-    /** Sets the given TensorInfo to the tensor
-     *
-     * @param[in] info TensorInfo to set
-     */
-    void set_info(SubTensorInfo &&info);
-    /** Returns tensor's TensorInfo
-     *
-     * @return TensorInfo of the tensor
-     */
-    const SubTensorInfo &info() const;
-    /** Returns a pointer to the internal tensor
-     *
-     * @return Tensor
-     */
-    ITensor *tensor();
-    /** Return the target that this tensor is pinned on
-     *
-     * @return Target of the tensor
-     */
-    TargetHint target() const;
+    // Inherited methods overriden:
+    bool                  call_accessor() override;
+    bool                  has_accessor() const override;
+    arm_compute::ITensor *set_target(TargetHint target) override;
+    arm_compute::ITensor *tensor() override;
+    TargetHint            target() const override;
+    void                  allocate() override;
 
 private:
     /** Instantiates a sub-tensor */
     void instantiate_subtensor();
 
 private:
-    TargetHint               _target;    /**< Target that this tensor is pinned on */
-    Coordinates              _coords;    /**< SubTensor Coordinates */
-    SubTensorInfo            _info;      /**< SubTensor metadata */
-    ITensor                 *_parent;    /**< Parent tensor */
-    std::unique_ptr<ITensor> _subtensor; /**< SubTensor */
+    TargetHint                            _target;       /**< Target that this tensor is pinned on */
+    TensorShape                           _tensor_shape; /**< SubTensor shape */
+    Coordinates                           _coords;       /**< SubTensor Coordinates */
+    arm_compute::ITensor                 *_parent;       /**< Parent tensor */
+    std::unique_ptr<arm_compute::ITensor> _subtensor;    /**< SubTensor */
 };
 } // namespace graph
 } // namespace arm_compute
diff --git a/arm_compute/graph/Tensor.h b/arm_compute/graph/Tensor.h
index 9fdd56db6e..dcb0c661d6 100644
--- a/arm_compute/graph/Tensor.h
+++ b/arm_compute/graph/Tensor.h
@@ -25,6 +25,7 @@
 #define __ARM_COMPUTE_GRAPH_TENSOR_H__
 
 #include "arm_compute/graph/ITensorAccessor.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Types.h"
 #include "support/ToolchainSupport.h"
 
@@ -35,7 +36,7 @@ namespace arm_compute
 namespace graph
 {
 /** Tensor class */
-class Tensor
+class Tensor final : public ITensorObject
 {
 public:
     /** Constructor
@@ -84,43 +85,27 @@ public:
      * @param[in] info TensorInfo to set
      */
     void set_info(TensorInfo &&info);
-    /** Calls accessor on tensor
-     *
-     * @return True if succeeds else false
-     */
-    bool call_accessor();
-    /** Sets target of the tensor
-     *
-     * @param[in] target Target where the tensor should be pinned in
-     *
-     * @return
-     */
-    ITensor *set_target(TargetHint target);
     /** Returns tensor's TensorInfo
      *
      * @return TensorInfo of the tensor
      */
     const TensorInfo &info() const;
-    /** Returns a pointer to the internal tensor
-     *
-     * @return Tensor
-     */
-    ITensor *tensor();
     /** Allocates and fills the tensor if needed */
     void allocate_and_fill_if_needed();
-    /** Allocates the tensor */
-    void allocate();
-    /** Return the target that this tensor is pinned on
-     *
-     * @return Target of the tensor
-     */
-    TargetHint target() const;
+
+    // Inherited methods overriden:
+    bool                  call_accessor() override;
+    bool                  has_accessor() const override;
+    arm_compute::ITensor *set_target(TargetHint target) override;
+    arm_compute::ITensor *tensor() override;
+    TargetHint            target() const override;
+    void                  allocate() override;
 
 private:
-    TargetHint                       _target;   /**< Target that this tensor is pinned on */
-    TensorInfo                       _info;     /**< Tensor metadata */
-    std::unique_ptr<ITensorAccessor> _accessor; /**< Tensor Accessor */
-    std::unique_ptr<ITensor>         _tensor;   /**< Tensor */
+    TargetHint                            _target;   /**< Target that this tensor is pinned on */
+    TensorInfo                            _info;     /**< Tensor metadata */
+    std::unique_ptr<ITensorAccessor>      _accessor; /**< Tensor Accessor */
+    std::unique_ptr<arm_compute::ITensor> _tensor;   /**< Tensor */
 };
 } // namespace graph
 } // namespace arm_compute
diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index e48ff84abf..c4396412a7 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h
@@ -32,19 +32,20 @@ namespace arm_compute
 {
 namespace graph
 {
-using arm_compute::ITensor;
-using arm_compute::TensorInfo;
-using arm_compute::SubTensorInfo;
-using arm_compute::DataType;
-using arm_compute::Coordinates;
-using arm_compute::TensorShape;
-using arm_compute::PadStrideInfo;
-using arm_compute::WeightsInfo;
 using arm_compute::ActivationLayerInfo;
+using arm_compute::Coordinates;
+using arm_compute::DataType;
+using arm_compute::DimensionRoundingType;
+using arm_compute::ITensorInfo;
 using arm_compute::NormType;
 using arm_compute::NormalizationLayerInfo;
+using arm_compute::PadStrideInfo;
 using arm_compute::PoolingLayerInfo;
 using arm_compute::PoolingType;
+using arm_compute::SubTensorInfo;
+using arm_compute::TensorInfo;
+using arm_compute::TensorShape;
+using arm_compute::WeightsInfo;
 
 /**< Execution hint to the graph executor */
 enum class TargetHint
@@ -54,12 +55,18 @@ enum class TargetHint
     NEON       /**< Run node on a NEON capable device */
 };
 
-/**< Convolution method hint to the graph executor */
+/** Convolution method hint to the graph executor */
 enum class ConvolutionMethodHint
 {
     GEMM,  /**< Convolution using GEMM */
     DIRECT /**< Direct convolution */
 };
+
+/** Branch layer merging method */
+enum class BranchMergeMethod
+{
+    DEPTH_CONCATENATE /**< Concatenate across depth */
+};
 } // namespace graph
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_GRAPH_TYPES_H__*/
diff --git a/arm_compute/graph/nodes/ActivationLayer.h b/arm_compute/graph/nodes/ActivationLayer.h
index efe8112e77..bc619a8df9 100644
--- a/arm_compute/graph/nodes/ActivationLayer.h
+++ b/arm_compute/graph/nodes/ActivationLayer.h
@@ -26,7 +26,7 @@
 
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/INode.h"
-#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Types.h"
 
 namespace arm_compute
@@ -44,7 +44,7 @@ public:
     ActivationLayer(const ActivationLayerInfo activation_info);
 
     // Inherited methods overriden:
-    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override;
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) override;
 
 private:
     const ActivationLayerInfo _activation_info; /**< Activation layer info */
diff --git a/arm_compute/graph/nodes/BatchNormalizationLayer.h b/arm_compute/graph/nodes/BatchNormalizationLayer.h
index f01cac2361..df7b1d19a9 100644
--- a/arm_compute/graph/nodes/BatchNormalizationLayer.h
+++ b/arm_compute/graph/nodes/BatchNormalizationLayer.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Tensor.h"
 #include "arm_compute/graph/Types.h"
 
@@ -52,7 +53,7 @@ public:
     }
 
     // Inherited methods overriden:
-    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override;
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) override;
 
 private:
     Tensor _mean;
diff --git a/arm_compute/graph/nodes/BranchLayer.h b/arm_compute/graph/nodes/BranchLayer.h
new file mode 100644
index 0000000000..3d13f5f2d3
--- /dev/null
+++ b/arm_compute/graph/nodes/BranchLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GRAPH_BRANCH_LAYER_H__
+#define __ARM_COMPUTE_GRAPH_BRANCH_LAYER_H__
+
+#include "arm_compute/graph/GraphContext.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/ITensorObject.h"
+#include "arm_compute/graph/SubGraph.h"
+#include "arm_compute/graph/SubTensor.h"
+#include "arm_compute/graph/Types.h"
+
+#include "arm_compute/core/Helpers.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace graph
+{
+/** Branch Layer node */
+class BranchLayer final : public INode
+{
+public:
+    /** Default Constructor
+     *
+     * @param[in] merge_method    Branch merging method
+     * @param[in] sub_graph1      First graph branch
+     * @param[in] sub_graph2      Second graph branch
+     * @param[in] rest_sub_graphs Rest sub-graph branches
+     */
+    template <typename... Ts>
+    BranchLayer(BranchMergeMethod merge_method, SubGraph &&sub_graph1, SubGraph &&sub_graph2, Ts &&... rest_sub_graphs)
+        : _branch_merge_method(merge_method), _sub_graphs()
+    {
+        /* TODO:(geopin01) Use traits to make sure variadic arguments are of SubGraph type */
+        _sub_graphs.push_back(arm_compute::support::cpp14::make_unique<SubGraph>(std::move(sub_graph1)));
+        _sub_graphs.push_back(arm_compute::support::cpp14::make_unique<SubGraph>(std::move(sub_graph2)));
+
+        for_each([&](SubGraph & sub_graph)
+        {
+            _sub_graphs.push_back(arm_compute::support::cpp14::make_unique<SubGraph>(std::move(sub_graph)));
+        },
+        std::move(rest_sub_graphs)...);
+    }
+
+    // Inherited methods overriden:
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) override;
+
+private:
+    BranchMergeMethod                      _branch_merge_method;
+    std::vector<std::unique_ptr<SubGraph>> _sub_graphs;
+};
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GRAPH_BRANCH_LAYER_H__ */
diff --git a/arm_compute/graph/nodes/ConvolutionLayer.h b/arm_compute/graph/nodes/ConvolutionLayer.h
index 04ba3dd6b7..0905524de8 100644
--- a/arm_compute/graph/nodes/ConvolutionLayer.h
+++ b/arm_compute/graph/nodes/ConvolutionLayer.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/SubTensor.h"
 #include "arm_compute/graph/Tensor.h"
 #include "arm_compute/graph/Types.h"
@@ -77,7 +78,7 @@ public:
     }
 
     // Inherited methods overriden:
-    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override;
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) override;
 
 private:
     /** Instantiates a non-grouped convolution
diff --git a/arm_compute/graph/nodes/DepthConcatenateLayer.h b/arm_compute/graph/nodes/DepthConcatenateLayer.h
new file mode 100644
index 0000000000..ac347a46d6
--- /dev/null
+++ b/arm_compute/graph/nodes/DepthConcatenateLayer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GRAPH_DEPTH_CONCATENATE_LAYER_H__
+#define __ARM_COMPUTE_GRAPH_DEPTH_CONCATENATE_LAYER_H__
+
+#include "arm_compute/graph/GraphContext.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/graph/Types.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+/** Depth Concatenate Layer node */
+class DepthConcatenateLayer
+{
+public:
+    /** Default Constructor */
+    DepthConcatenateLayer()                              = default;
+    DepthConcatenateLayer(const DepthConcatenateLayer &) = delete;
+    DepthConcatenateLayer &operator=(const DepthConcatenateLayer &) = delete;
+    DepthConcatenateLayer(DepthConcatenateLayer &&)                 = default;
+    DepthConcatenateLayer &operator=(DepthConcatenateLayer &&) = delete;
+
+    // Inherited methods overriden:
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, std::vector<ITensor *> inputs, ITensor *output);
+    void print_info();
+
+private:
+    TargetHint             _hint{ TargetHint::DONT_CARE };
+    std::vector<ITensor *> _inputs{ nullptr };
+    ITensor               *_output{ nullptr };
+};
+} // namespace graph
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GRAPH_DEPTH_CONCATENATE_LAYER_H__ */
diff --git a/arm_compute/graph/nodes/FloorLayer.h b/arm_compute/graph/nodes/FloorLayer.h
index 40fde3b791..f88a5b9d94 100644
--- a/arm_compute/graph/nodes/FloorLayer.h
+++ b/arm_compute/graph/nodes/FloorLayer.h
@@ -26,7 +26,7 @@
 
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/INode.h"
-#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Types.h"
 namespace arm_compute
 {
@@ -37,7 +37,7 @@ class FloorLayer : public INode
 {
 public:
     // Inherited methods overriden:
-    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override;
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) override;
 };
 
 } // namespace graph
diff --git a/arm_compute/graph/nodes/FullyConnectedLayer.h b/arm_compute/graph/nodes/FullyConnectedLayer.h
index d31e060457..270676a6b5 100644
--- a/arm_compute/graph/nodes/FullyConnectedLayer.h
+++ b/arm_compute/graph/nodes/FullyConnectedLayer.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Tensor.h"
 #include "arm_compute/graph/Types.h"
 
@@ -50,7 +51,7 @@ public:
     }
 
     // Inherited methods overriden:
-    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override;
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) override;
 
     // Inherited methods overriden:
 private:
diff --git a/arm_compute/graph/nodes/L2NormalizeLayer.h b/arm_compute/graph/nodes/L2NormalizeLayer.h
index ab333a221c..ddc1646485 100644
--- a/arm_compute/graph/nodes/L2NormalizeLayer.h
+++ b/arm_compute/graph/nodes/L2NormalizeLayer.h
@@ -26,7 +26,7 @@
 
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/INode.h"
-#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Types.h"
 
 namespace arm_compute
@@ -48,7 +48,7 @@ public:
     }
 
     // Inherited methods overriden:
-    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override;
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) override;
 
 private:
     unsigned int _axis;
diff --git a/arm_compute/graph/nodes/NormalizationLayer.h b/arm_compute/graph/nodes/NormalizationLayer.h
index 02efd1cbeb..e1c45094d8 100644
--- a/arm_compute/graph/nodes/NormalizationLayer.h
+++ b/arm_compute/graph/nodes/NormalizationLayer.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Types.h"
 
 namespace arm_compute
@@ -43,7 +44,7 @@ public:
     explicit NormalizationLayer(const NormalizationLayerInfo norm_info);
 
     // Inherited methods overriden:
-    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override;
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) override;
 
 private:
     const NormalizationLayerInfo _norm_info; /**< Normalization layer information */
diff --git a/arm_compute/graph/nodes/PoolingLayer.h b/arm_compute/graph/nodes/PoolingLayer.h
index 87b15d06cb..5c45bc04ed 100644
--- a/arm_compute/graph/nodes/PoolingLayer.h
+++ b/arm_compute/graph/nodes/PoolingLayer.h
@@ -26,7 +26,7 @@
 
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/INode.h"
-#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Types.h"
 
 namespace arm_compute
@@ -44,7 +44,7 @@ public:
     PoolingLayer(const PoolingLayerInfo pool_info);
 
     // Inherited methods overriden:
-    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override;
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) override;
 
 private:
     const PoolingLayerInfo _pool_info; /**< Pooling layer information */
diff --git a/arm_compute/graph/nodes/SoftmaxLayer.h b/arm_compute/graph/nodes/SoftmaxLayer.h
index 2e1bd98c8d..b5d1bc53fd 100644
--- a/arm_compute/graph/nodes/SoftmaxLayer.h
+++ b/arm_compute/graph/nodes/SoftmaxLayer.h
@@ -26,20 +26,19 @@
 
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/INode.h"
-#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Types.h"
 namespace arm_compute
 {
 namespace graph
 {
 /** Softmax layer node */
-class SoftmaxLayer : public INode
+class SoftmaxLayer final : public INode
 {
 public:
     // Inherited methods overriden:
-    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output) override;
+    std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) override;
 };
-
 } // namespace graph
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_GRAPH_SOFTMAX_LAYER_H__ */
diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp
new file mode 100644
index 0000000000..0e82c1e85d
--- /dev/null
+++ b/examples/graph_googlenet.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */
+#error "This example needs to be built with -DARM_COMPUTE_CL"
+#endif /* ARM_COMPUTE_CL */
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/Nodes.h"
+#include "arm_compute/graph/SubGraph.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/Scheduler.h"
+#include "support/ToolchainSupport.h"
+#include "utils/GraphUtils.h"
+#include "utils/Utils.h"
+
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+#include <tuple>
+
+using namespace arm_compute::graph;
+using namespace arm_compute::graph_utils;
+
+/** Generates appropriate accessor according to the specified path
+ *
+ * @note If path is empty will generate a DummyAccessor else will generate a NumPyBinLoader
+ *
+ * @param path       Path to the data files
+ * @param data_file  Relative path to the data files from path
+ *
+ * @return An appropriate tensor accessor
+ */
+std::unique_ptr<ITensorAccessor> get_accessor(const std::string &path, const std::string &data_file)
+{
+    if(path.empty())
+    {
+        return arm_compute::support::cpp14::make_unique<DummyAccessor>();
+    }
+    else
+    {
+        return arm_compute::support::cpp14::make_unique<NumPyBinLoader>(path + data_file);
+    }
+}
+
+BranchLayer get_inception_node(const std::string &data_path, std::string &&param_path,
+                               unsigned int a_filt,
+                               std::tuple<unsigned int, unsigned int> b_filters,
+                               std::tuple<unsigned int, unsigned int> c_filters,
+                               unsigned int d_filt)
+{
+    std::string total_path = "/cnn_data/googlenet_model/" + param_path + "/" + param_path + "_";
+    SubGraph    i_a;
+    i_a << ConvolutionLayer(
+            1U, 1U, a_filt,
+            get_accessor(data_path, total_path + "1x1_w.npy"),
+            get_accessor(data_path, total_path + "1x1_b.npy"),
+            PadStrideInfo(1, 1, 0, 0))
+        << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+    SubGraph i_b;
+    i_b << ConvolutionLayer(
+            1U, 1U, std::get<0>(b_filters),
+            get_accessor(data_path, total_path + "3x3_reduce_w.npy"),
+            get_accessor(data_path, total_path + "3x3_reduce_b.npy"),
+            PadStrideInfo(1, 1, 0, 0))
+        << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+        << ConvolutionLayer(
+            3U, 3U, std::get<1>(b_filters),
+            get_accessor(data_path, total_path + "3x3_w.npy"),
+            get_accessor(data_path, total_path + "3x3_b.npy"),
+            PadStrideInfo(1, 1, 1, 1))
+        << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+    SubGraph i_c;
+    i_c << ConvolutionLayer(
+            1U, 1U, std::get<0>(c_filters),
+            get_accessor(data_path, total_path + "5x5_reduce_w.npy"),
+            get_accessor(data_path, total_path + "5x5_reduce_b.npy"),
+            PadStrideInfo(1, 1, 0, 0))
+        << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+        << ConvolutionLayer(
+            5U, 5U, std::get<1>(c_filters),
+            get_accessor(data_path, total_path + "5x5_w.npy"),
+            get_accessor(data_path, total_path + "5x5_b.npy"),
+            PadStrideInfo(1, 1, 2, 2))
+        << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+    SubGraph i_d;
+    i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)))
+        << ConvolutionLayer(
+            1U, 1U, d_filt,
+            get_accessor(data_path, total_path + "pool_proj_w.npy"),
+            get_accessor(data_path, total_path + "pool_proj_b.npy"),
+            PadStrideInfo(1, 1, 0, 0))
+        << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+    return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
+}
+
+/** Example demonstrating how to implement Googlenet's network using the Compute Library's graph API
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] batches )
+ */
+void main_graph_googlenet(int argc, const char **argv)
+{
+    std::string  data_path;   /** Path to the trainable data */
+    unsigned int batches = 4; /** Number of batches */
+
+    // Parse arguments
+    if(argc < 2)
+    {
+        // Print help
+        std::cout << "Usage: " << argv[0] << " [path_to_data] [batches]\n\n";
+        std::cout << "No data folder provided: using random values\n\n";
+    }
+    else if(argc == 2)
+    {
+        //Do something with argv[1]
+        data_path = argv[1];
+        std::cout << "Usage: " << argv[0] << " [path_to_data] [batches]\n\n";
+        std::cout << "No number of batches where specified, thus will use the default : " << batches << "\n\n";
+    }
+    else
+    {
+        //Do something with argv[1] and argv[2]
+        data_path = argv[1];
+        batches   = std::strtol(argv[2], nullptr, 0);
+    }
+
+    // Check if OpenCL is available and initialize the scheduler
+    if(arm_compute::opencl_is_available())
+    {
+        arm_compute::CLScheduler::get().default_init();
+    }
+
+    Graph graph;
+
+    graph << TargetHint::OPENCL
+          << Tensor(TensorInfo(TensorShape(224U, 224U, 3U, batches), 1, DataType::F32), DummyAccessor())
+          << ConvolutionLayer(
+              7U, 7U, 64U,
+              get_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy"),
+              get_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_b.npy"),
+              PadStrideInfo(2, 2, 3, 3))
+          << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+          << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+          << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
+          << ConvolutionLayer(
+              1U, 1U, 64U,
+              get_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy"),
+              get_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_b.npy"),
+              PadStrideInfo(1, 1, 0, 0))
+          << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+          << ConvolutionLayer(
+              3U, 3U, 192U,
+              get_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy"),
+              get_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_b.npy"),
+              PadStrideInfo(1, 1, 1, 1))
+          << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+          << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
+          << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+          << get_inception_node(data_path, "inception_3a", 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U)
+          << get_inception_node(data_path, "inception_3b", 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U)
+          << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+          << get_inception_node(data_path, "inception_4a", 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U)
+          << get_inception_node(data_path, "inception_4b", 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U)
+          << get_inception_node(data_path, "inception_4c", 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U)
+          << get_inception_node(data_path, "inception_4d", 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U)
+          << get_inception_node(data_path, "inception_4e", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U)
+          << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+          << get_inception_node(data_path, "inception_5a", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U)
+          << get_inception_node(data_path, "inception_5b", 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U)
+          << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 7, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)))
+          << FullyConnectedLayer(
+              1000U,
+              get_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy"),
+              get_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_b.npy"))
+          << SoftmaxLayer()
+          << Tensor(DummyAccessor());
+
+    graph.run();
+}
+
+/** Main program for Googlenet
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] batches )
+ */
+int main(int argc, const char **argv)
+{
+    return arm_compute::utils::run_example(argc, argv, main_graph_googlenet);
+}
diff --git a/src/core/SubTensorInfo.cpp b/src/core/SubTensorInfo.cpp
index f5a282df8a..878283bd8e 100644
--- a/src/core/SubTensorInfo.cpp
+++ b/src/core/SubTensorInfo.cpp
@@ -38,7 +38,11 @@ SubTensorInfo::SubTensorInfo(ITensorInfo *parent, const TensorShape &tensor_shap
     : _parent(parent), _tensor_shape(tensor_shape), _coords(coords), _valid_region{ Coordinates(), _tensor_shape }
 {
     ARM_COMPUTE_ERROR_ON(parent == nullptr);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(parent->tensor_shape(), coords, tensor_shape);
+    // Check if subtensor is valid if parent is configured
+    if(parent->tensor_shape().total_size() != 0)
+    {
+        ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(parent->tensor_shape(), coords, tensor_shape);
+    }
 
     // Initialize valid region
     Coordinates coordinates;
@@ -49,7 +53,11 @@ SubTensorInfo::SubTensorInfo(ITensorInfo *parent, const TensorShape &tensor_shap
 void SubTensorInfo::set_tensor_shape(TensorShape shape)
 {
     ARM_COMPUTE_ERROR_ON(_parent == nullptr);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(_parent->tensor_shape(), _coords, shape);
+    // Check if subtensor is valid if parent is configured
+    if(_parent->tensor_shape().total_size() != 0)
+    {
+        ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(_parent->tensor_shape(), _coords, shape);
+    }
     _tensor_shape = shape;
 }
 
diff --git a/src/graph/CL/CLMap.cpp b/src/graph/CL/CLMap.cpp
index 4892b9609b..5289ea9a04 100644
--- a/src/graph/CL/CLMap.cpp
+++ b/src/graph/CL/CLMap.cpp
@@ -23,20 +23,21 @@
  */
 #include "arm_compute/graph/CL/CLMap.h"
 
+#include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Validate.h"
-#include "arm_compute/graph/Tensor.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/graph/ITensorObject.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
 
 using namespace arm_compute::graph;
 
-CLMap::CLMap(Tensor *tensor, bool blocking)
-    : _tensor(dynamic_cast<arm_compute::CLTensor *>(tensor->tensor())), _blocking(blocking)
+CLMap::CLMap(ITensorObject *tensor, bool blocking)
+    : _tensor(dynamic_cast<arm_compute::ICLTensor *>(tensor->tensor())), _blocking(blocking)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(_tensor);
 }
 
 void CLMap::run()
 {
-    _tensor->map(_blocking);
+    _tensor->map(arm_compute::CLScheduler::get().queue(), _blocking);
 }
diff --git a/src/graph/CL/CLUnmap.cpp b/src/graph/CL/CLUnmap.cpp
index ec7d8650d6..31f2f19e9c 100644
--- a/src/graph/CL/CLUnmap.cpp
+++ b/src/graph/CL/CLUnmap.cpp
@@ -23,20 +23,21 @@
  */
 #include "arm_compute/graph/CL/CLUnmap.h"
 
+#include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Validate.h"
-#include "arm_compute/graph/Tensor.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/graph/ITensorObject.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
 
 using namespace arm_compute::graph;
 
-CLUnmap::CLUnmap(Tensor *tensor)
-    : _tensor(dynamic_cast<arm_compute::CLTensor *>(tensor->tensor()))
+CLUnmap::CLUnmap(ITensorObject *tensor)
+    : _tensor(dynamic_cast<arm_compute::ICLTensor *>(tensor->tensor()))
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(_tensor);
 }
 
 void CLUnmap::run()
 {
-    _tensor->unmap();
+    _tensor->unmap(arm_compute::CLScheduler::get().queue());
 }
diff --git a/src/graph/Graph.cpp b/src/graph/Graph.cpp
index 7dddb1cd9a..b86330b658 100644
--- a/src/graph/Graph.cpp
+++ b/src/graph/Graph.cpp
@@ -26,16 +26,18 @@
 #include "arm_compute/graph/CL/CLMap.h"
 #include "arm_compute/graph/CL/CLUnmap.h"
 #include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/ITensorObject.h"
 #include "arm_compute/graph/Tensor.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/Tensor.h"
+#include "support/ToolchainSupport.h"
 
 using namespace arm_compute::graph;
 
 struct Stage
 {
-    Tensor                                 *_input;
-    Tensor                                 *_output;
+    ITensorObject                          *_input;
+    ITensorObject                          *_output;
     std::unique_ptr<arm_compute::IFunction> _function;
 };
 
@@ -48,20 +50,21 @@ public:
      */
     void configure(GraphHints _next_hints);
 
-    GraphContext                         _ctx{};
-    std::vector<Stage>                   _pipeline{};
-    std::vector<std::unique_ptr<Tensor>> _tensors{};
-    std::vector<std::unique_ptr<INode>>  _nodes{};
-    GraphHints                           _current_hints{};
-    GraphHints                           _next_hints{};
-    std::unique_ptr<Tensor>              _graph_input{ nullptr };
-    std::unique_ptr<Tensor>              _graph_output{ nullptr };
-    std::unique_ptr<INode>               _current_node{ nullptr };
-    Tensor                              *_current_output{ nullptr };
+    GraphContext                                _ctx{};
+    std::vector<Stage>                          _pipeline{};
+    std::vector<std::unique_ptr<ITensorObject>> _tensors{};
+    std::vector<std::unique_ptr<INode>>         _nodes{};
+    GraphHints                                  _current_hints{};
+    GraphHints                                  _next_hints{};
+    std::unique_ptr<ITensorObject>              _graph_input{ nullptr };
+    std::unique_ptr<ITensorObject>              _graph_output{ nullptr };
+    std::unique_ptr<INode>                      _current_node{ nullptr };
+    ITensorObject                              *_current_output{ nullptr };
+    bool                                        _info_enabled{ false };
 
 private:
-    Tensor    *_current_input{ nullptr };
-    GraphHints _previous_hints{};
+    ITensorObject *_current_input{ nullptr };
+    GraphHints     _previous_hints{};
 };
 
 Graph::~Graph() //NOLINT
@@ -78,7 +81,7 @@ void Graph::run()
 {
     while(true)
     {
-        if(!_pimpl->_graph_input->call_accessor())
+        if(_pimpl->_graph_input->has_accessor() && !_pimpl->_graph_input->call_accessor())
         {
             return;
         }
@@ -88,7 +91,8 @@ void Graph::run()
             stage._function->run();
         }
 
-        if(!_pimpl->_graph_output->call_accessor())
+        if((_pimpl->_graph_output->has_accessor() && !_pimpl->_graph_output->call_accessor())
+           || (!_pimpl->_graph_output->has_accessor()))
         {
             return;
         }
@@ -126,9 +130,11 @@ void Graph::Private::configure(GraphHints _next_hints)
         _current_output->set_target(TargetHint::NEON);
     }
 
-    // Update ctx and instantiate node
+    // Instantiate Node
     _ctx.hints()                                 = _current_hints;
-    std::unique_ptr<arm_compute::IFunction> func = _current_node->instantiate_node(_ctx, _current_input->tensor(), _current_output->tensor());
+    std::unique_ptr<arm_compute::IFunction> func = _current_node->instantiate_node(_ctx, _current_input, _current_output);
+
+    // Allocate current input
     _current_input->allocate();
 
     // Map input if needed
@@ -181,7 +187,7 @@ void Graph::add_node(std::unique_ptr<INode> node)
 }
 
 //Add a tensor with an Accessor (i.e either the input or output of the graph)
-void Graph::add_tensor(std::unique_ptr<Tensor> tensor)
+void Graph::add_tensor_object(std::unique_ptr<ITensorObject> tensor)
 {
     // If it's the first Tensor added then it will be the input of the Graph.
     if(_pimpl->_graph_input == nullptr)
@@ -227,7 +233,13 @@ Graph &arm_compute::graph::operator<<(Graph &graph, TensorInfo &&info)
 
 Graph &arm_compute::graph::operator<<(Graph &graph, Tensor &&tensor)
 {
-    graph.add_tensor(arm_compute::support::cpp14::make_unique<Tensor>(std::move(tensor)));
+    graph.add_tensor_object(arm_compute::support::cpp14::make_unique<Tensor>(std::move(tensor)));
+    return graph;
+}
+
+Graph &arm_compute::graph::operator<<(Graph &graph, SubTensor &&sub_tensor)
+{
+    graph.add_tensor_object(arm_compute::support::cpp14::make_unique<SubTensor>(std::move(sub_tensor)));
     return graph;
 }
 
diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp
index 4b383f562b..582f936351 100644
--- a/src/graph/INode.cpp
+++ b/src/graph/INode.cpp
@@ -26,8 +26,6 @@
 #include "arm_compute/core/CL/OpenCL.h"
 #include "arm_compute/core/Validate.h"
 
-#include <ostream>
-
 using namespace arm_compute::graph;
 
 TargetHint INode::override_target_hint(TargetHint target_hint) const
diff --git a/src/graph/SubGraph.cpp b/src/graph/SubGraph.cpp
new file mode 100644
index 0000000000..977cd4a4ae
--- /dev/null
+++ b/src/graph/SubGraph.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/SubGraph.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/Tensor.h"
+
+using namespace arm_compute::graph;
+
+SubGraph::SubGraph()
+    : _nodes(), _input(nullptr), _output(nullptr)
+{
+}
+
+void SubGraph::add_node(std::unique_ptr<INode> node)
+{
+    _nodes.push_back(std::move(node));
+}
+
+void SubGraph::add_tensor_object(std::unique_ptr<ITensorObject> tensor)
+{
+    // If it's the first Tensor added then it will be the input of the Graph.
+    if(_input == nullptr)
+    {
+        _input = std::move(tensor);
+    }
+    else
+    {
+        _output = std::move(tensor);
+    }
+}
+
+std::unique_ptr<Graph> SubGraph::construct(TargetHint hint, std::unique_ptr<ITensorObject> input, std::unique_ptr<ITensorObject> output)
+{
+    auto graph = arm_compute::support::cpp14::make_unique<Graph>();
+
+    // Set hint
+    // TODO(geopin01): store hints of sub-graph
+    graph->hints().set_target_hint(hint);
+
+    // Configure input
+    if(_input == nullptr)
+    {
+        _input = std::move(input);
+    }
+    graph->add_tensor_object(std::move(_input));
+
+    // Construct nodes
+    for(auto &node : _nodes)
+    {
+        graph->add_node(std::move(node));
+    }
+
+    // Configure output
+    if(_output == nullptr)
+    {
+        _output = std::move(output);
+    }
+    graph->add_tensor_object(std::move(_output));
+
+    return graph;
+}
+
+bool SubGraph::has_input() const
+{
+    return _input != nullptr;
+}
+
+bool SubGraph::has_output() const
+{
+    return _output != nullptr;
+}
+
+SubGraph &arm_compute::graph::operator<<(SubGraph &graph, Tensor &&tensor)
+{
+    graph.add_tensor_object(arm_compute::support::cpp14::make_unique<Tensor>(std::move(tensor)));
+    return graph;
+}
+
+SubGraph &arm_compute::graph::operator<<(SubGraph &graph, SubTensor &&sub_tensor)
+{
+    graph.add_tensor_object(arm_compute::support::cpp14::make_unique<SubTensor>(std::move(sub_tensor)));
+    return graph;
+}
diff --git a/src/graph/SubTensor.cpp b/src/graph/SubTensor.cpp
index abf8506c33..da8de956d7 100644
--- a/src/graph/SubTensor.cpp
+++ b/src/graph/SubTensor.cpp
@@ -27,7 +27,9 @@
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/runtime/CL/CLSubTensor.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/SubTensor.h"
+#include "arm_compute/runtime/Tensor.h"
 #include "utils/TypePrinter.h"
 
 using namespace arm_compute::graph;
@@ -35,7 +37,7 @@ using namespace arm_compute::graph;
 namespace
 {
 template <typename SubTensorType, typename ParentTensorType>
-std::unique_ptr<ITensor> initialise_subtensor(ITensor *parent, TensorShape shape, Coordinates coords)
+std::unique_ptr<arm_compute::ITensor> initialise_subtensor(arm_compute::ITensor *parent, TensorShape shape, Coordinates coords)
 {
     auto ptensor   = dynamic_cast<ParentTensorType *>(parent);
     auto subtensor = arm_compute::support::cpp14::make_unique<SubTensorType>(ptensor, shape, coords);
@@ -44,41 +46,44 @@ std::unique_ptr<ITensor> initialise_subtensor(ITensor *parent, TensorShape shape
 } // namespace
 
 SubTensor::SubTensor()
-    : _target(TargetHint::DONT_CARE), _coords(), _info(), _parent(nullptr), _subtensor(nullptr)
+    : _target(TargetHint::DONT_CARE), _tensor_shape(), _coords(), _parent(nullptr), _subtensor(nullptr)
 {
 }
 
 SubTensor::SubTensor(Tensor &parent, TensorShape tensor_shape, Coordinates coords)
-    : _target(TargetHint::DONT_CARE), _coords(coords), _info(), _parent(nullptr), _subtensor(nullptr)
+    : _target(TargetHint::DONT_CARE), _tensor_shape(tensor_shape), _coords(coords), _parent(nullptr), _subtensor(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(parent.tensor() == nullptr);
     _parent = parent.tensor();
-    _info   = SubTensorInfo(parent.tensor()->info(), tensor_shape, coords);
     _target = parent.target();
 
     instantiate_subtensor();
 }
 
-SubTensor::SubTensor(ITensor *parent, TensorShape tensor_shape, Coordinates coords, TargetHint target)
-    : _target(target), _coords(coords), _info(), _parent(parent), _subtensor(nullptr)
+SubTensor::SubTensor(arm_compute::ITensor *parent, TensorShape tensor_shape, Coordinates coords, TargetHint target)
+    : _target(target), _tensor_shape(tensor_shape), _coords(coords), _parent(parent), _subtensor(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(parent == nullptr);
-    _info = SubTensorInfo(parent->info(), tensor_shape, coords);
-
     instantiate_subtensor();
 }
 
-void SubTensor::set_info(SubTensorInfo &&info)
+bool SubTensor::call_accessor()
+{
+    return true;
+}
+
+bool SubTensor::has_accessor() const
 {
-    _info = info;
+    return false;
 }
 
-const SubTensorInfo &SubTensor::info() const
+arm_compute::ITensor *SubTensor::set_target(TargetHint target)
 {
-    return _info;
+    ARM_COMPUTE_ERROR_ON(target != _target);
+    return (target == _target) ? _subtensor.get() : nullptr;
 }
 
-ITensor *SubTensor::tensor()
+arm_compute::ITensor *SubTensor::tensor()
 {
     return _subtensor.get();
 }
@@ -88,15 +93,20 @@ TargetHint SubTensor::target() const
     return _target;
 }
 
+void SubTensor::allocate()
+{
+    // NOP for sub-tensors
+}
+
 void SubTensor::instantiate_subtensor()
 {
     switch(_target)
     {
         case TargetHint::OPENCL:
-            _subtensor = initialise_subtensor<arm_compute::CLSubTensor, arm_compute::ICLTensor>(_parent, _info.tensor_shape(), _coords);
+            _subtensor = initialise_subtensor<arm_compute::CLSubTensor, arm_compute::ICLTensor>(_parent, _tensor_shape, _coords);
             break;
         case TargetHint::NEON:
-            _subtensor = initialise_subtensor<arm_compute::SubTensor, arm_compute::ITensor>(_parent, _info.tensor_shape(), _coords);
+            _subtensor = initialise_subtensor<arm_compute::SubTensor, arm_compute::ITensor>(_parent, _tensor_shape, _coords);
             break;
         default:
             ARM_COMPUTE_ERROR("Invalid TargetHint");
diff --git a/src/graph/Tensor.cpp b/src/graph/Tensor.cpp
index 31dd4e86ac..f85fe27dbf 100644
--- a/src/graph/Tensor.cpp
+++ b/src/graph/Tensor.cpp
@@ -35,7 +35,7 @@ using namespace arm_compute::graph;
 namespace
 {
 template <typename TensorType>
-std::unique_ptr<ITensor> initialise_tensor(TensorInfo &info)
+std::unique_ptr<arm_compute::ITensor> initialise_tensor(TensorInfo &info)
 {
     auto tensor = arm_compute::support::cpp14::make_unique<TensorType>();
     tensor->allocator()->init(info);
@@ -43,7 +43,7 @@ std::unique_ptr<ITensor> initialise_tensor(TensorInfo &info)
 }
 
 template <typename TensorType>
-void tensor_allocate(ITensor &tensor)
+void tensor_allocate(arm_compute::ITensor &tensor)
 {
     auto itensor = dynamic_cast<TensorType *>(&tensor);
     ARM_COMPUTE_ERROR_ON_NULLPTR(itensor);
@@ -85,7 +85,12 @@ bool Tensor::call_accessor()
     return retval;
 }
 
-ITensor *Tensor::tensor()
+bool Tensor::has_accessor() const
+{
+    return (_accessor != nullptr);
+}
+
+arm_compute::ITensor *Tensor::tensor()
 {
     return _tensor.get();
 }
@@ -95,7 +100,7 @@ const TensorInfo &Tensor::info() const
     return _info;
 }
 
-ITensor *Tensor::set_target(TargetHint target)
+arm_compute::ITensor *Tensor::set_target(TargetHint target)
 {
     if(_tensor != nullptr)
     {
diff --git a/src/graph/nodes/ActivationLayer.cpp b/src/graph/nodes/ActivationLayer.cpp
index 5cd2a0bcc2..5e75c28bc7 100644
--- a/src/graph/nodes/ActivationLayer.cpp
+++ b/src/graph/nodes/ActivationLayer.cpp
@@ -36,7 +36,7 @@ using namespace arm_compute::graph;
 namespace
 {
 template <typename ActivationType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output, const ActivationLayerInfo &activation_info)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output, const ActivationLayerInfo &activation_info)
 {
     auto activation = arm_compute::support::cpp14::make_unique<ActivationType>();
     activation->configure(
@@ -48,18 +48,18 @@ std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITe
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output, const ActivationLayerInfo &activation_info);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output, const ActivationLayerInfo &activation_info);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output, const ActivationLayerInfo &activation_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output, const ActivationLayerInfo &activation_info)
 {
-    return instantiate_function<arm_compute::CLActivationLayer, arm_compute::CLTensor, TargetHint::OPENCL>(input, output, activation_info);
+    return instantiate_function<arm_compute::CLActivationLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output, activation_info);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output, const ActivationLayerInfo &activation_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output, const ActivationLayerInfo &activation_info)
 {
-    return instantiate_function<arm_compute::NEActivationLayer, arm_compute::Tensor, TargetHint::NEON>(input, output, activation_info);
+    return instantiate_function<arm_compute::NEActivationLayer, arm_compute::ITensor, TargetHint::NEON>(input, output, activation_info);
 }
 } // namespace
 
@@ -68,25 +68,29 @@ ActivationLayer::ActivationLayer(const ActivationLayerInfo activation_info)
 {
 }
 
-std::unique_ptr<arm_compute::IFunction> ActivationLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> ActivationLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output, _activation_info);
-        ARM_COMPUTE_LOG("Instantiating CLActivationLayer");
+        func = instantiate<TargetHint::OPENCL>(in, out, _activation_info);
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output, _activation_info);
-        ARM_COMPUTE_LOG("Instantiating NEActivationLayer");
+        func = instantiate<TargetHint::NEON>(in, out, _activation_info);
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << " Activation function: " << _activation_info.activation()
                     << " a: " << _activation_info.a()
                     << " b: " << _activation_info.b()
diff --git a/src/graph/nodes/BatchNormalizationLayer.cpp b/src/graph/nodes/BatchNormalizationLayer.cpp
index a6a990fd3f..25e9e9bffb 100644
--- a/src/graph/nodes/BatchNormalizationLayer.cpp
+++ b/src/graph/nodes/BatchNormalizationLayer.cpp
@@ -36,7 +36,7 @@ using namespace arm_compute::graph;
 namespace
 {
 template <typename BatchBatchNormalizationLayer, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon)
 {
     auto norm = arm_compute::support::cpp14::make_unique<BatchBatchNormalizationLayer>();
     norm->configure(
@@ -52,58 +52,65 @@ std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITe
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma,
+                                                                        float epsilon)
 {
     return instantiate_function<arm_compute::CLBatchNormalizationLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output, mean, var, beta, gamma, epsilon);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output, Tensor &mean, Tensor &var, Tensor &beta, Tensor &gamma, float epsilon)
 {
     return instantiate_function<arm_compute::NEBatchNormalizationLayer, arm_compute::ITensor, TargetHint::NEON>(input, output, mean, var, beta, gamma, epsilon);
 }
 } // namespace
 
-std::unique_ptr<arm_compute::IFunction> BatchNormalizationLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> BatchNormalizationLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
-    unsigned int batch_norm_size = input->info()->dimension(2);
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
+    unsigned int batch_norm_size = in->info()->dimension(2);
     if(_mean.tensor() == nullptr)
     {
-        _mean.set_info(TensorInfo(TensorShape(batch_norm_size), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _mean.set_info(TensorInfo(TensorShape(batch_norm_size), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
     if(_var.tensor() == nullptr)
     {
-        _var.set_info(TensorInfo(TensorShape(batch_norm_size), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _var.set_info(TensorInfo(TensorShape(batch_norm_size), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
     if(_beta.tensor() == nullptr)
     {
-        _beta.set_info(TensorInfo(TensorShape(batch_norm_size), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _beta.set_info(TensorInfo(TensorShape(batch_norm_size), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
     if(_gamma.tensor() == nullptr)
     {
-        _gamma.set_info(TensorInfo(TensorShape(batch_norm_size), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _gamma.set_info(TensorInfo(TensorShape(batch_norm_size), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
 
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output, _mean, _var, _beta, _gamma, _epsilon);
+        func = instantiate<TargetHint::OPENCL>(in, out, _mean, _var, _beta, _gamma, _epsilon);
         ARM_COMPUTE_LOG("Instantiating CLBatchNormalizationLayer");
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output, _mean, _var, _beta, _gamma, _epsilon);
+        func = instantiate<TargetHint::NEON>(in, out, _mean, _var, _beta, _gamma, _epsilon);
         ARM_COMPUTE_LOG("Instantiating NEBatchNormalizationLayer");
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << std::endl);
 
     return func;
diff --git a/src/graph/nodes/BranchLayer.cpp b/src/graph/nodes/BranchLayer.cpp
new file mode 100644
index 0000000000..28f58c6815
--- /dev/null
+++ b/src/graph/nodes/BranchLayer.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/BranchLayer.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/SubGraph.h"
+#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "support/ToolchainSupport.h"
+#include "utils/TypePrinter.h"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+
+using namespace arm_compute::graph;
+
+namespace
+{
+void depth_concatenate_output_info(ITensorInfo *info, ITensorInfo *sub_tensor_info)
+{
+    ARM_COMPUTE_ERROR_ON(info == nullptr);
+    ARM_COMPUTE_ERROR_ON(sub_tensor_info == nullptr);
+
+    TensorShape        info_shape            = info->tensor_shape();
+    const TensorShape &sub_tensor_info_shape = sub_tensor_info->tensor_shape();
+
+    // Update parent info and valid region
+    if(info_shape.total_size() == 0)
+    {
+        arm_compute::auto_init_if_empty(*info,
+                                        sub_tensor_info->tensor_shape(),
+                                        sub_tensor_info->num_channels(),
+                                        sub_tensor_info->data_type(), sub_tensor_info->fixed_point_position());
+        info->set_valid_region(sub_tensor_info->valid_region());
+    }
+    else
+    {
+        ARM_COMPUTE_ERROR_ON(info->num_channels() != sub_tensor_info->num_channels());
+        ARM_COMPUTE_ERROR_ON(info->data_type() != sub_tensor_info->data_type());
+        ARM_COMPUTE_ERROR_ON(info->fixed_point_position() != sub_tensor_info->fixed_point_position());
+
+        // Concatenate depth
+        ARM_COMPUTE_ERROR_ON(info_shape.x() != sub_tensor_info_shape.x());
+        ARM_COMPUTE_ERROR_ON(info_shape.y() != sub_tensor_info_shape.y());
+        info_shape.set(2, info_shape.z() + sub_tensor_info_shape.z());
+        info->set_tensor_shape(info_shape);
+
+        // Update valid region
+        arm_compute::ValidRegion info_valid_region = info->valid_region();
+        info_valid_region.shape.set(2, info_shape.z());
+        arm_compute::ValidRegion updated_region = arm_compute::intersect_valid_regions(info_valid_region, sub_tensor_info->valid_region());
+        info->set_valid_region(updated_region);
+    }
+}
+} // namespace
+
+/** Branch function */
+class BranchFunction final : public arm_compute::IFunction
+{
+public:
+    /** Default Constructor */
+    BranchFunction()
+        : _graphs()
+    {
+    }
+    /** Registers graph to be executed by the branch function
+     *
+     * @param[in] graph Graph to register
+     */
+    void register_graph(std::unique_ptr<Graph> graph)
+    {
+        _graphs.push_back(std::move(graph));
+    }
+    // Inherited methods overriden:
+    void run() override
+    {
+        for(auto &g : _graphs)
+        {
+            ARM_COMPUTE_ERROR_ON(g.get() == nullptr);
+            g->run();
+        }
+    }
+
+private:
+    std::vector<std::unique_ptr<Graph>> _graphs;
+};
+
+std::unique_ptr<arm_compute::IFunction> BranchLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
+{
+    ARM_COMPUTE_ERROR_ON(_branch_merge_method != BranchMergeMethod::DEPTH_CONCATENATE);
+    ARM_COMPUTE_UNUSED(_branch_merge_method);
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
+    // Create branch function
+    auto func = arm_compute::support::cpp14::make_unique<BranchFunction>();
+
+    // Track output SubTensorInfo and depth
+    TensorInfo out_info;
+    int        depth = 0;
+
+    // Constuct all sub-graphs given the input/output
+    for(auto &sg : _sub_graphs)
+    {
+        ARM_COMPUTE_ERROR_ON(sg.get() == nullptr);
+
+        // IO buffers
+        std::unique_ptr<ITensorObject> in;
+        std::unique_ptr<ITensorObject> out;
+        SubTensor                     *out_sub_tensor = nullptr;
+
+        // Create input sub-tensor
+        if(!sg->has_input())
+        {
+            ARM_COMPUTE_ERROR_ON(dynamic_cast<Tensor *>(input) == nullptr);
+            in = arm_compute::support::cpp14::make_unique<SubTensor>(*dynamic_cast<Tensor *>(input),
+                                                                     input->tensor()->info()->tensor_shape(),
+                                                                     Coordinates());
+        }
+
+        // Create output sub-tensor
+        if(!sg->has_output())
+        {
+            ARM_COMPUTE_ERROR_ON(dynamic_cast<Tensor *>(output) == nullptr);
+            out = arm_compute::support::cpp14::make_unique<SubTensor>(*dynamic_cast<Tensor *>(output),
+                                                                      output->tensor()->info()->tensor_shape(),
+                                                                      Coordinates(0, 0, depth));
+            out_sub_tensor = dynamic_cast<SubTensor *>(out.get());
+        }
+
+        // Construct sub_graph
+        auto g = sg->construct(ctx.hints().target_hint(), std::move(in), std::move(out));
+
+        // Register graph to function
+        func->register_graph(std::move(g));
+
+        // Update and track depth
+        if(out_sub_tensor != nullptr)
+        {
+            ARM_COMPUTE_ERROR_ON(out_sub_tensor->tensor() == nullptr);
+            depth += out_sub_tensor->tensor()->info()->tensor_shape()[2];
+            depth_concatenate_output_info(&out_info, out_sub_tensor->tensor()->info());
+        }
+    }
+
+    // Auto-init output
+    arm_compute::auto_init_if_empty(*output->tensor()->info(),
+                                    out_info.tensor_shape(),
+                                    out_info.num_channels(),
+                                    out_info.data_type(),
+                                    out_info.fixed_point_position());
+
+    return std::move(func);
+}
\ No newline at end of file
diff --git a/src/graph/nodes/ConvolutionLayer.cpp b/src/graph/nodes/ConvolutionLayer.cpp
index b47be8dc33..303780ff35 100644
--- a/src/graph/nodes/ConvolutionLayer.cpp
+++ b/src/graph/nodes/ConvolutionLayer.cpp
@@ -67,7 +67,8 @@ TensorShape calculate_convolution_layer_output_shape(const TensorShape &input_sh
 
 // Instantiate GEMM based convolution layer
 template <typename ConvolutionType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *weights, arm_compute::ITensor *biases, arm_compute::ITensor *output,
+                                                             const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
 {
     auto conv = arm_compute::support::cpp14::make_unique<ConvolutionType>();
     conv->configure(
@@ -81,7 +82,8 @@ std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITe
 
 // Instantiate direct convolution layer
 template <typename ConvolutionType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_direct_function(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
+std::unique_ptr<arm_compute::IFunction> instantiate_direct_function(arm_compute::ITensor *input, arm_compute::ITensor *weights, arm_compute::ITensor *biases, arm_compute::ITensor *output,
+                                                                    const PadStrideInfo &conv_info)
 {
     auto conv = arm_compute::support::cpp14::make_unique<ConvolutionType>();
     conv->configure(
@@ -94,11 +96,13 @@ std::unique_ptr<arm_compute::IFunction> instantiate_direct_function(ITensor *inp
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *weights, arm_compute::ITensor *biases, arm_compute::ITensor *output,
+                                                    const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
                                                     ConvolutionMethodHint conv_method);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *weights, arm_compute::ITensor *biases, arm_compute::ITensor *output,
+                                                                        const PadStrideInfo &conv_info,
                                                                         const WeightsInfo    &weights_info,
                                                                         ConvolutionMethodHint conv_method)
 {
@@ -113,7 +117,8 @@ std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *weights, ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *weights, arm_compute::ITensor *biases, arm_compute::ITensor *output,
+                                                                      const PadStrideInfo &conv_info,
                                                                       const WeightsInfo    &weights_info,
                                                                       ConvolutionMethodHint conv_method)
 {
@@ -169,18 +174,25 @@ private:
     std::vector<std::unique_ptr<IFunction>> _convolutions;
 };
 
-std::unique_ptr<arm_compute::IFunction> ConvolutionLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> ConvolutionLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     // Set weights and biases info
     if(_weights.tensor() == nullptr)
     {
-        _weights.set_info(TensorInfo(TensorShape(_conv_width, _conv_height, input->info()->dimension(2) / _num_groups, _ofm),
-                                     input->info()->num_channels(), input->info()->data_type(),
-                                     input->info()->fixed_point_position()));
+        _weights.set_info(TensorInfo(TensorShape(_conv_width, _conv_height, in->info()->dimension(2) / _num_groups, _ofm),
+                                     in->info()->num_channels(),
+                                     in->info()->data_type(),
+                                     in->info()->fixed_point_position()));
     }
     if(_biases.tensor() == nullptr)
     {
-        _biases.set_info(TensorInfo(TensorShape(_ofm), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _biases.set_info(TensorInfo(TensorShape(_ofm), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
 
     std::unique_ptr<arm_compute::IFunction> func;
@@ -196,20 +208,20 @@ std::unique_ptr<arm_compute::IFunction> ConvolutionLayer::instantiate_node(Graph
     _biases.set_target(_target_hint);
 
     // Calculate output shape
-    TensorShape output_shape = calculate_convolution_layer_output_shape(input->info()->tensor_shape(), _weights.info().tensor_shape(), _conv_info);
+    TensorShape output_shape = calculate_convolution_layer_output_shape(in->info()->tensor_shape(), _weights.info().tensor_shape(), _conv_info);
 
     // Output auto inizialitation if not yet initialized
-    arm_compute::auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+    arm_compute::auto_init_if_empty(*out->info(), output_shape, 1, in->info()->data_type(), in->info()->fixed_point_position());
 
     // Create appropriate convolution function
     if(_num_groups == 1)
     {
-        func = instantiate_convolution(input, output, conv_method_hint);
+        func = instantiate_convolution(in, out, conv_method_hint);
         ARM_COMPUTE_LOG("Instantiating CLConvolutionLayer");
     }
     else
     {
-        func = instantiate_grouped_convolution(input, output, conv_method_hint);
+        func = instantiate_grouped_convolution(in, out, conv_method_hint);
         ARM_COMPUTE_LOG("Instantiating NEConvolutionLayer");
     }
 
@@ -224,11 +236,11 @@ std::unique_ptr<arm_compute::IFunction> ConvolutionLayer::instantiate_node(Graph
         _biases.allocate_and_fill_if_needed();
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input Shape: " << input->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input Shape: " << in->info()->tensor_shape()
                     << " Weights shape: " << _weights.info().tensor_shape()
                     << " Biases Shape: " << _biases.info().tensor_shape()
-                    << " Output Shape: " << output->info()->tensor_shape()
+                    << " Output Shape: " << out->info()->tensor_shape()
                     << " PadStrideInfo: " << _conv_info
                     << " Groups: " << _num_groups
                     << " WeightsInfo: " << _weights_info
diff --git a/src/graph/nodes/DepthConcatenateLayer.cpp b/src/graph/nodes/DepthConcatenateLayer.cpp
new file mode 100644
index 0000000000..2171db3a3e
--- /dev/null
+++ b/src/graph/nodes/DepthConcatenateLayer.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <algorithm>
+#include <vector>
+
+#include "arm_compute/graph/nodes/DepthConcatenateLayer.h"
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "support/ToolchainSupport.h"
+#include "utils/TypePrinter.h"
+
+using namespace arm_compute::graph;
+
+namespace
+{
+template <typename DepthConcatenationType, typename TensorType, TargetHint hint>
+std::unique_ptr<arm_compute::IFunction> instantiate_function(std::vector<arm_compute::ITensor *> inputs, arm_compute::ITensor *output)
+{
+    auto                      depth_concat = arm_compute::support::cpp14::make_unique<DepthConcatenationType>();
+    std::vector<TensorType *> casted_inputs;
+    std::transform(inputs.begin(), inputs.end(), std::back_inserter(casted_inputs), [](arm_compute::ITensor * input)
+    {
+        return dynamic_cast<TensorType *>(input);
+    });
+    depth_concat->configure(
+        casted_inputs,
+        dynamic_cast<TensorType *>(output));
+
+    return std::move(depth_concat);
+}
+
+template <TargetHint                    hint>
+std::unique_ptr<arm_compute::IFunction> instantiate(std::vector<arm_compute::ITensor *> inputs, arm_compute::ITensor *output);
+
+template <>
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(std::vector<arm_compute::ITensor *> inputs, arm_compute::ITensor *output)
+{
+    return instantiate_function<arm_compute::CLDepthConcatenate, arm_compute::ICLTensor, TargetHint::OPENCL>(std::move(inputs), output);
+}
+
+template <>
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(std::vector<arm_compute::ITensor *> inputs, arm_compute::ITensor *output)
+{
+    return instantiate_function<arm_compute::NEDepthConcatenate, arm_compute::ITensor, TargetHint::NEON>(std::move(inputs), output);
+}
+} // namespace
+
+std::unique_ptr<arm_compute::IFunction> DepthConcatenateLayer::instantiate_node(GraphContext &ctx, std::vector<arm_compute::ITensor *> inputs, arm_compute::ITensor *output)
+{
+    std::unique_ptr<arm_compute::IFunction> func;
+    _hint   = ctx.hints().target_hint();
+    _inputs = std::move(inputs);
+    _output = output;
+
+    if(_hint == TargetHint::OPENCL)
+    {
+        func = instantiate<TargetHint::OPENCL>(_inputs, _output);
+    }
+    else
+    {
+        func = instantiate<TargetHint::NEON>(_inputs, _output);
+    }
+    return func;
+}
+
+void DepthConcatenateLayer::print_info()
+{
+    if(_hint == TargetHint::OPENCL)
+    {
+        std::cout << "Instantiating NEDepthConcatenate";
+    }
+    else
+    {
+        std::cout << "Instantiating CLDepthConcatenate";
+    }
+
+    for(const auto &i : _inputs)
+    {
+        std::cout << " Input: " << i->info()->tensor_shape();
+    }
+    std::cout << " Output: " << _output->info()->tensor_shape();
+}
diff --git a/src/graph/nodes/FloorLayer.cpp b/src/graph/nodes/FloorLayer.cpp
index 722cfdf609..3224799e3e 100644
--- a/src/graph/nodes/FloorLayer.cpp
+++ b/src/graph/nodes/FloorLayer.cpp
@@ -36,7 +36,7 @@ using namespace arm_compute::graph;
 namespace
 {
 template <typename FloorType, typename TensorType, TargetHint hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
     auto floorlayer = arm_compute::support::cpp14::make_unique<FloorType>();
     floorlayer->configure(
@@ -47,40 +47,46 @@ std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITe
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
     return instantiate_function<arm_compute::CLFloor, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
     return instantiate_function<arm_compute::NEFloor, arm_compute::ITensor, TargetHint::NEON>(input, output);
 }
 } // namespace
 
-std::unique_ptr<arm_compute::IFunction> FloorLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> FloorLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output);
+        func = instantiate<TargetHint::OPENCL>(in, out);
         ARM_COMPUTE_LOG("Instantiating CLFloorLayer");
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output);
+        func = instantiate<TargetHint::NEON>(in, out);
         ARM_COMPUTE_LOG("Instantiating NEFloorLayer");
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << std::endl);
 
     return func;
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index 6b21810a36..fa5ead8bdd 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -45,7 +45,7 @@ TensorShape calculate_fullyconnected_layer_output_shape(const TensorShape &input
     return TensorShape(output_neurons, batches);
 }
 template <typename FullyConnectedType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, Tensor &weights, Tensor &biases, arm_compute::ITensor *output)
 {
     bool weights_are_loaded = weights.tensor() != nullptr;
     bool biases_are_loaded  = biases.tensor() != nullptr;
@@ -69,27 +69,33 @@ std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, Ten
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, Tensor &weights, Tensor &biases, arm_compute::ITensor *output);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, Tensor &weights, Tensor &biases, arm_compute::ITensor *output)
 {
-    return instantiate_function<arm_compute::CLFullyConnectedLayer, arm_compute::CLTensor, TargetHint::OPENCL>(input, weights, biases, output);
+    return instantiate_function<arm_compute::CLFullyConnectedLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, weights, biases, output);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, Tensor &weights, Tensor &biases, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, Tensor &weights, Tensor &biases, arm_compute::ITensor *output)
 {
-    return instantiate_function<arm_compute::NEFullyConnectedLayer, arm_compute::Tensor, TargetHint::NEON>(input, weights, biases, output);
+    return instantiate_function<arm_compute::NEFullyConnectedLayer, arm_compute::ITensor, TargetHint::NEON>(input, weights, biases, output);
 }
 } // namespace
 
-std::unique_ptr<arm_compute::IFunction> FullyConnectedLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> FullyConnectedLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_weights.tensor() == nullptr)
     {
         unsigned int num_weights    = 1;
-        unsigned int num_dimensions = input->info()->num_dimensions();
+        unsigned int num_dimensions = in->info()->num_dimensions();
         // Ignore the batch dimension if there is one:
         if(num_dimensions == 2 || num_dimensions == 4)
         {
@@ -97,39 +103,37 @@ std::unique_ptr<arm_compute::IFunction> FullyConnectedLayer::instantiate_node(Gr
         }
         for(unsigned int i = 0; i < num_dimensions; i++)
         {
-            num_weights *= input->info()->dimension(i);
+            num_weights *= in->info()->dimension(i);
         }
-        _weights.set_info(TensorInfo(TensorShape(num_weights, _num_neurons), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _weights.set_info(TensorInfo(TensorShape(num_weights, _num_neurons), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
     if(_biases.tensor() == nullptr)
     {
-        _biases.set_info(TensorInfo(TensorShape(_num_neurons), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+        _biases.set_info(TensorInfo(TensorShape(_num_neurons), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position()));
     }
 
     // Auto configure output
-    arm_compute::auto_init_if_empty(*output->info(),
-                                    calculate_fullyconnected_layer_output_shape(input->info()->tensor_shape(), _num_neurons),
-                                    input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position());
+    arm_compute::auto_init_if_empty(*out->info(),
+                                    calculate_fullyconnected_layer_output_shape(in->info()->tensor_shape(), _num_neurons),
+                                    in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position());
 
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, _weights, _biases, output);
-        ARM_COMPUTE_LOG("Instantiating CLFullyConnectedLayer");
+        func = instantiate<TargetHint::OPENCL>(in, _weights, _biases, out);
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, _weights, _biases, output);
-        ARM_COMPUTE_LOG("Instantiating NEFullyConnectedLayer");
+        func = instantiate<TargetHint::NEON>(in, _weights, _biases, out);
     }
 
-    ARM_COMPUTE_LOG(" Type: " << input->info()->data_type()
-                    << " Input Shape: " << input->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Type: " << in->info()->data_type()
+                    << " Input Shape: " << in->info()->tensor_shape()
                     << " Weights shape: " << _weights.info().tensor_shape()
                     << " Biases Shape: " << _biases.info().tensor_shape()
-                    << " Output Shape: " << output->info()->tensor_shape()
+                    << " Output Shape: " << out->info()->tensor_shape()
                     << std::endl);
 
     return func;
diff --git a/src/graph/nodes/L2NormalizeLayer.cpp b/src/graph/nodes/L2NormalizeLayer.cpp
index 46d1552733..7abc69c13a 100644
--- a/src/graph/nodes/L2NormalizeLayer.cpp
+++ b/src/graph/nodes/L2NormalizeLayer.cpp
@@ -36,7 +36,7 @@ using namespace arm_compute::graph;
 namespace
 {
 template <typename L2NormalizeType, typename TensorType, TargetHint hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output, unsigned int axis, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output, unsigned int axis, float epsilon)
 {
     auto l2norm = arm_compute::support::cpp14::make_unique<L2NormalizeType>();
     l2norm->configure(
@@ -49,40 +49,46 @@ std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITe
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output, unsigned int axis, float epsilon);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output, unsigned int axis, float epsilon);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output, unsigned int axis, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output, unsigned int axis, float epsilon)
 {
     return instantiate_function<arm_compute::CLL2Normalize, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output, axis, epsilon);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output, unsigned int axis, float epsilon)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output, unsigned int axis, float epsilon)
 {
     return instantiate_function<arm_compute::NEL2Normalize, arm_compute::ITensor, TargetHint::NEON>(input, output, axis, epsilon);
 }
 } // namespace
 
-std::unique_ptr<arm_compute::IFunction> L2NormalizeLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> L2NormalizeLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output, _axis, _epsilon);
+        func = instantiate<TargetHint::OPENCL>(in, out, _axis, _epsilon);
         ARM_COMPUTE_LOG("Instantiating CLL2NormalizeLayer");
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output, _axis, _epsilon);
+        func = instantiate<TargetHint::NEON>(in, out, _axis, _epsilon);
         ARM_COMPUTE_LOG("Instantiating NEL2NormalizeLayer");
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << std::endl);
 
     return func;
diff --git a/src/graph/nodes/NormalizationLayer.cpp b/src/graph/nodes/NormalizationLayer.cpp
index 47f0891dfb..319a4252b6 100644
--- a/src/graph/nodes/NormalizationLayer.cpp
+++ b/src/graph/nodes/NormalizationLayer.cpp
@@ -36,7 +36,7 @@ using namespace arm_compute::graph;
 namespace
 {
 template <typename NormalizationType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output, const NormalizationLayerInfo &norm_info)
 {
     auto norm = arm_compute::support::cpp14::make_unique<NormalizationType>();
     norm->configure(
@@ -48,18 +48,18 @@ std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITe
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output, const NormalizationLayerInfo &norm_info);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output, const NormalizationLayerInfo &norm_info)
 {
-    return instantiate_function<arm_compute::CLNormalizationLayer, arm_compute::CLTensor, TargetHint::OPENCL>(input, output, norm_info);
+    return instantiate_function<arm_compute::CLNormalizationLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output, norm_info);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output, const NormalizationLayerInfo &norm_info)
 {
-    return instantiate_function<arm_compute::NENormalizationLayer, arm_compute::Tensor, TargetHint::NEON>(input, output, norm_info);
+    return instantiate_function<arm_compute::NENormalizationLayer, arm_compute::ITensor, TargetHint::NEON>(input, output, norm_info);
 }
 } // namespace
 
@@ -68,25 +68,29 @@ NormalizationLayer::NormalizationLayer(const NormalizationLayerInfo norm_info)
 {
 }
 
-std::unique_ptr<arm_compute::IFunction> NormalizationLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> NormalizationLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output, _norm_info);
-        ARM_COMPUTE_LOG("Instantiating CLNormalizationLayer");
+        func = instantiate<TargetHint::OPENCL>(in, out, _norm_info);
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output, _norm_info);
-        ARM_COMPUTE_LOG("Instantiating NENormalizationLayer");
+        func = instantiate<TargetHint::NEON>(in, out, _norm_info);
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << " Normalization info: " << _norm_info
                     << std::endl);
 
diff --git a/src/graph/nodes/PoolingLayer.cpp b/src/graph/nodes/PoolingLayer.cpp
index 317cf4d14f..904ba18169 100644
--- a/src/graph/nodes/PoolingLayer.cpp
+++ b/src/graph/nodes/PoolingLayer.cpp
@@ -36,7 +36,7 @@ using namespace arm_compute::graph;
 namespace
 {
 template <typename PoolingType, typename TensorType, TargetHint target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output, const PoolingLayerInfo &pool_info)
 {
     auto pool = arm_compute::support::cpp14::make_unique<PoolingType>();
     pool->configure(
@@ -48,18 +48,18 @@ std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITe
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output, const PoolingLayerInfo &pool_info);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output, const PoolingLayerInfo &pool_info)
 {
-    return instantiate_function<arm_compute::CLPoolingLayer, arm_compute::CLTensor, TargetHint::OPENCL>(input, output, pool_info);
+    return instantiate_function<arm_compute::CLPoolingLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output, pool_info);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output, const PoolingLayerInfo &pool_info)
 {
-    return instantiate_function<arm_compute::NEPoolingLayer, arm_compute::Tensor, TargetHint::NEON>(input, output, pool_info);
+    return instantiate_function<arm_compute::NEPoolingLayer, arm_compute::ITensor, TargetHint::NEON>(input, output, pool_info);
 }
 } // namespace
 
@@ -68,25 +68,29 @@ PoolingLayer::PoolingLayer(const PoolingLayerInfo pool_info)
 {
 }
 
-std::unique_ptr<arm_compute::IFunction> PoolingLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> PoolingLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output, _pool_info);
-        ARM_COMPUTE_LOG("Instantiating CLPoolingLayer");
+        func = instantiate<TargetHint::OPENCL>(in, out, _pool_info);
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output, _pool_info);
-        ARM_COMPUTE_LOG("Instantiating NEPoolingLayer");
+        func = instantiate<TargetHint::NEON>(in, out, _pool_info);
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << " Pooling info: " << _pool_info << std::endl);
 
     return func;
diff --git a/src/graph/nodes/SoftmaxLayer.cpp b/src/graph/nodes/SoftmaxLayer.cpp
index 86282448f7..e3345f1400 100644
--- a/src/graph/nodes/SoftmaxLayer.cpp
+++ b/src/graph/nodes/SoftmaxLayer.cpp
@@ -36,7 +36,7 @@ using namespace arm_compute::graph;
 namespace
 {
 template <typename SoftmaxType, typename TensorType, TargetHint hint>
-std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate_function(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
     auto softmax = arm_compute::support::cpp14::make_unique<SoftmaxType>();
     softmax->configure(
@@ -47,40 +47,44 @@ std::unique_ptr<arm_compute::IFunction> instantiate_function(ITensor *input, ITe
 }
 
 template <TargetHint                    target_hint>
-std::unique_ptr<arm_compute::IFunction> instantiate(ITensor *input, ITensor *output);
+std::unique_ptr<arm_compute::IFunction> instantiate(arm_compute::ITensor *input, arm_compute::ITensor *output);
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::OPENCL>(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
-    return instantiate_function<arm_compute::CLSoftmaxLayer, arm_compute::CLTensor, TargetHint::OPENCL>(input, output);
+    return instantiate_function<arm_compute::CLSoftmaxLayer, arm_compute::ICLTensor, TargetHint::OPENCL>(input, output);
 }
 
 template <>
-std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> instantiate<TargetHint::NEON>(arm_compute::ITensor *input, arm_compute::ITensor *output)
 {
-    return instantiate_function<arm_compute::NESoftmaxLayer, arm_compute::Tensor, TargetHint::NEON>(input, output);
+    return instantiate_function<arm_compute::NESoftmaxLayer, arm_compute::ITensor, TargetHint::NEON>(input, output);
 }
 } // namespace
 
-std::unique_ptr<arm_compute::IFunction> SoftmaxLayer::instantiate_node(GraphContext &ctx, ITensor *input, ITensor *output)
+std::unique_ptr<arm_compute::IFunction> SoftmaxLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
 {
+    ARM_COMPUTE_ERROR_ON(input == nullptr || input->tensor() == nullptr);
+    ARM_COMPUTE_ERROR_ON(output == nullptr || output->tensor() == nullptr);
+
     std::unique_ptr<arm_compute::IFunction> func;
     _target_hint = ctx.hints().target_hint();
 
+    arm_compute::ITensor *in  = input->tensor();
+    arm_compute::ITensor *out = output->tensor();
+
     if(_target_hint == TargetHint::OPENCL)
     {
-        func = instantiate<TargetHint::OPENCL>(input, output);
-        ARM_COMPUTE_LOG("Instantiating CLSoftmaxLayer");
+        func = instantiate<TargetHint::OPENCL>(in, out);
     }
     else
     {
-        func = instantiate<TargetHint::NEON>(input, output);
-        ARM_COMPUTE_LOG("Instantiating NESoftmaxLayer");
+        func = instantiate<TargetHint::NEON>(in, out);
     }
 
-    ARM_COMPUTE_LOG(" Data Type: " << input->info()->data_type()
-                    << " Input shape: " << input->info()->tensor_shape()
-                    << " Output shape: " << output->info()->tensor_shape()
+    ARM_COMPUTE_LOG(" Data Type: " << in->info()->data_type()
+                    << " Input shape: " << in->info()->tensor_shape()
+                    << " Output shape: " << out->info()->tensor_shape()
                     << std::endl);
 
     return func;
-- 
cgit v1.2.1