From 5c2fb3f34462632b99331e2cc2d964c99fc1782b Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Tue, 1 May 2018 15:26:20 +0100
Subject: COMPMID-997: Add support for node's name in GraphAPI.

Change-Id: I0ca02e42807c1ad9afeffb7202a3556feb11442f
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129701
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 arm_compute/graph/Workload.h                   | 21 ++++++
 arm_compute/graph/frontend/ILayer.h            | 22 +++++++
 arm_compute/graph/frontend/IStreamOperators.h  | 12 ++++
 arm_compute/graph/frontend/Layers.h            | 26 ++++----
 examples/graph_alexnet.cpp                     | 34 ++++++----
 examples/graph_lenet.cpp                       | 12 ++--
 examples/graph_mobilenet.cpp                   | 19 ++++--
 examples/graph_resnet50.cpp                    | 91 +++++++++++++++-----------
 examples/graph_vgg16.cpp                       | 58 ++++++++++------
 examples/graph_vgg19.cpp                       | 67 ++++++++++++-------
 src/graph/Workload.cpp                         | 20 +++++-
 tests/SConscript                               | 32 +++------
 tests/framework/Profiler.cpp                   |  8 +--
 tests/framework/instruments/OpenCLTimer.cpp    | 66 +++++++++++++------
 tests/framework/instruments/OpenCLTimer.h      | 13 +++-
 tests/framework/instruments/SchedulerTimer.cpp | 62 +++++++++++++++---
 tests/framework/instruments/SchedulerTimer.h   | 16 +++--
 tests/framework/printers/PrettyPrinter.cpp     |  4 +-
 18 files changed, 396 insertions(+), 187 deletions(-)

diff --git a/arm_compute/graph/Workload.h b/arm_compute/graph/Workload.h
index 11bb22ea9a..5ca9fd6c58 100644
--- a/arm_compute/graph/Workload.h
+++ b/arm_compute/graph/Workload.h
@@ -39,6 +39,27 @@ class INode;
 class Tensor;
 class Graph;
 
+struct ExecutionTask;
+
+void execute_task(ExecutionTask &task);
+
+/** Task executor */
+class TaskExecutor final
+{
+private:
+    /** Default constructor **/
+    TaskExecutor();
+
+public:
+    /** Task executor accessor
+     *
+     * @return Task executor instance
+     */
+    static TaskExecutor &get();
+    /** Function that is responsible for executing tasks */
+    std::function<decltype(execute_task)> execute_function;
+};
+
 /** Execution task
  *
  * Contains all the information required to execute a given task
diff --git a/arm_compute/graph/frontend/ILayer.h b/arm_compute/graph/frontend/ILayer.h
index 5add8ab440..f7caaea47d 100644
--- a/arm_compute/graph/frontend/ILayer.h
+++ b/arm_compute/graph/frontend/ILayer.h
@@ -46,6 +46,28 @@ public:
      * @return ID of the created node.
      */
     virtual NodeID create_layer(IStream &s) = 0;
+    /** Sets the name of the layer
+     *
+     * @param[in] name Name of the layer
+     *
+     * @return The layer object
+     */
+    ILayer &set_name(std::string name)
+    {
+        _name = name;
+        return *this;
+    }
+    /** Layer name accessor
+     *
+     * @return Returns the name of the layer
+     */
+    const std::string &name() const
+    {
+        return _name;
+    }
+
+private:
+    std::string _name = {};
 };
 } // namespace frontend
 } // namespace graph
diff --git a/arm_compute/graph/frontend/IStreamOperators.h b/arm_compute/graph/frontend/IStreamOperators.h
index 1eb6522935..350d78fd1c 100644
--- a/arm_compute/graph/frontend/IStreamOperators.h
+++ b/arm_compute/graph/frontend/IStreamOperators.h
@@ -48,6 +48,18 @@ inline IStream &operator<<(IStream &s, ILayer &&layer)
     s.add_layer(layer);
     return s;
 }
+/** Overloaded stream operator to add a node to the graph
+ *
+ * @param[in, out] s     Stream to add the tensor
+ * @param[in]      layer Layer to be added
+ *
+ * @return Updated stream
+ */
+inline IStream &operator<<(IStream &s, ILayer &layer)
+{
+    s.add_layer(layer);
+    return s;
+}
 /** Overloaded stream operator to provide a target hint to the graph
  *
  * @param[in, out] s           Stream to provide the hint to
diff --git a/arm_compute/graph/frontend/Layers.h b/arm_compute/graph/frontend/Layers.h
index 2e7c50e6da..54cf515aa7 100644
--- a/arm_compute/graph/frontend/Layers.h
+++ b/arm_compute/graph/frontend/Layers.h
@@ -57,7 +57,7 @@ public:
 
     NodeID create_layer(IStream &s) override
     {
-        NodeParams common_params = { "", s.hints().target_hint };
+        NodeParams common_params = { name(), s.hints().target_hint };
         return GraphBuilder::add_input_node(s.graph(), common_params, _desc, std::move(_accessor));
     }
 
@@ -81,7 +81,7 @@ public:
 
     NodeID create_layer(IStream &s) override
     {
-        NodeParams  common_params = { "", s.hints().target_hint };
+        NodeParams  common_params = { name(), s.hints().target_hint };
         NodeIdxPair input         = { s.tail_node(), 0 };
         return GraphBuilder::add_output_node(s.graph(), common_params, input, std::move(_accessor));
     }
@@ -105,7 +105,7 @@ public:
 
     NodeID create_layer(IStream &s) override
     {
-        NodeParams  common_params = { "", s.hints().target_hint };
+        NodeParams  common_params = { name(), s.hints().target_hint };
         NodeIdxPair input         = { s.tail_node(), 0 };
         return GraphBuilder::add_activation_node(s.graph(), common_params, input, _act_info);
     }
@@ -140,7 +140,7 @@ public:
         ARM_COMPUTE_ERROR_ON(_mean == nullptr);
         ARM_COMPUTE_ERROR_ON(_var == nullptr);
 
-        NodeParams  common_params = { "", s.hints().target_hint };
+        NodeParams  common_params = { name(), s.hints().target_hint };
         NodeIdxPair input         = { s.tail_node(), 0 };
         return GraphBuilder::add_batch_normalization_node(s.graph(), common_params, input, _epsilon,
                                                           std::move(_mean), std::move(_var), std::move(_beta), std::move(_gamma));
@@ -194,7 +194,7 @@ public:
     NodeID create_layer(IStream &s) override
     {
         NodeIdxPair input         = { s.tail_node(), 0 };
-        NodeParams  common_params = { "", s.hints().target_hint };
+        NodeParams  common_params = { name(), s.hints().target_hint };
         return GraphBuilder::add_convolution_node(s.graph(), common_params, input,
                                                   Size2D(_conv_width, _conv_height), _ofm, _conv_info, _num_groups,
                                                   s.hints().convolution_method_hint,
@@ -244,7 +244,7 @@ public:
     NodeID create_layer(IStream &s) override
     {
         NodeIdxPair input         = { s.tail_node(), 0 };
-        NodeParams  common_params = { "", s.hints().target_hint };
+        NodeParams  common_params = { name(), s.hints().target_hint };
         return GraphBuilder::add_depthwise_convolution_node(s.graph(), common_params,
                                                             input, Size2D(_conv_width, _conv_height), _conv_info,
                                                             s.hints().depthwise_convolution_method_hint,
@@ -271,7 +271,7 @@ public:
 
     NodeID create_layer(IStream &s) override
     {
-        NodeParams  common_params = { "", s.hints().target_hint };
+        NodeParams  common_params = { name(), s.hints().target_hint };
         NodeIdxPair input         = { s.tail_node(), 0 };
         return GraphBuilder::add_flatten_node(s.graph(), common_params, input);
     }
@@ -296,7 +296,7 @@ public:
 
     NodeID create_layer(IStream &s) override
     {
-        NodeParams  common_params = { "", s.hints().target_hint };
+        NodeParams  common_params = { name(), s.hints().target_hint };
         NodeIdxPair input         = { s.tail_node(), 0 };
         return GraphBuilder::add_fully_connected_layer(s.graph(), common_params, input, _num_outputs,
                                                        std::move(_weights), std::move(_bias));
@@ -323,7 +323,7 @@ public:
 
     NodeID create_layer(IStream &s) override
     {
-        NodeParams  common_params = { "", s.hints().target_hint };
+        NodeParams  common_params = { name(), s.hints().target_hint };
         NodeIdxPair input         = { s.tail_node(), 0 };
         return GraphBuilder::add_normalization_node(s.graph(), common_params, input, _norm_info);
     }
@@ -347,7 +347,7 @@ public:
 
     NodeID create_layer(IStream &s) override
     {
-        NodeParams  common_params = { "", s.hints().target_hint };
+        NodeParams  common_params = { name(), s.hints().target_hint };
         NodeIdxPair input         = { s.tail_node(), 0 };
         return GraphBuilder::add_pooling_node(s.graph(), common_params, input, _pool_info);
     }
@@ -371,7 +371,7 @@ public:
 
     NodeID create_layer(IStream &s) override
     {
-        NodeParams  common_params = { "", s.hints().target_hint };
+        NodeParams  common_params = { name(), s.hints().target_hint };
         NodeIdxPair input         = { s.tail_node(), 0 };
         return GraphBuilder::add_reshape_node(s.graph(), common_params, input, _shape);
     }
@@ -395,7 +395,7 @@ public:
 
     NodeID create_layer(IStream &s) override
     {
-        NodeParams  common_params = { "", s.hints().target_hint };
+        NodeParams  common_params = { name(), s.hints().target_hint };
         NodeIdxPair input         = { s.tail_node(), 0 };
         return GraphBuilder::add_softmax_node(s.graph(), common_params, input, _beta);
     }
@@ -441,7 +441,7 @@ public:
     NodeID create_layer(IStream &s) override
     {
         NodeID     nid           = EmptyNodeID;
-        NodeParams common_params = { "", s.hints().target_hint };
+        NodeParams common_params = { name(), s.hints().target_hint };
         if(_sub_streams.size() == 1 && _sub_streams.at(0) != nullptr)
         {
             nid = _sub_streams[0]->tail_node();
diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp
index 6a3c14b7bb..d654f9ece9 100644
--- a/examples/graph_alexnet.cpp
+++ b/examples/graph_alexnet.cpp
@@ -103,9 +103,10 @@ public:
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_b.npy"),
                   PadStrideInfo(4, 4, 0, 0))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu1")
+              << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm1")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool1")
               // Layer 2
               << convolution_5x5_hint
               << ConvolutionLayer(
@@ -113,9 +114,10 @@ public:
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_b.npy"),
                   PadStrideInfo(1, 1, 2, 2), 2)
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu2")
+              << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm2")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool2")
               << convolution_3x3_hint
               // Layer 3
               << ConvolutionLayer(
@@ -123,41 +125,47 @@ public:
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv3")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu3")
               // Layer 4
               << ConvolutionLayer(
                   3U, 3U, 384U,
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_b.npy"),
                   PadStrideInfo(1, 1, 1, 1), 2)
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv4")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu4")
               // Layer 5
               << ConvolutionLayer(
                   3U, 3U, 256U,
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_b.npy"),
                   PadStrideInfo(1, 1, 1, 1), 2)
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv5")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu5")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool5")
               // Layer 6
               << FullyConnectedLayer(
                   4096U,
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_b.npy"))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("fc6")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu6")
               // Layer 7
               << FullyConnectedLayer(
                   4096U,
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_b.npy"))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("fc7")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu7")
               // Layer 8
               << FullyConnectedLayer(
                   1000U,
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_b.npy"))
+              .set_name("fc8")
               // Softmax
-              << SoftmaxLayer()
+              << SoftmaxLayer().set_name("prob")
               << OutputLayer(get_output_accessor(label, 5));
 
         // Finalize graph
diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp
index 8aca0fa45b..895d9aad4e 100644
--- a/examples/graph_lenet.cpp
+++ b/examples/graph_lenet.cpp
@@ -84,23 +84,27 @@ public:
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv1")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool1")
               << ConvolutionLayer(
                   5U, 5U, 50U,
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv2")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool2")
               << FullyConnectedLayer(
                   500U,
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_b.npy"))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("ip1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu")
               << FullyConnectedLayer(
                   10U,
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_b.npy"))
-              << SoftmaxLayer()
+              .set_name("ip2")
+              << SoftmaxLayer().set_name("prob")
               << OutputLayer(get_output_accessor(""));
 
         // Finalize graph
diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp
index 6e2921a8a6..870e67daa5 100644
--- a/examples/graph_mobilenet.cpp
+++ b/examples/graph_mobilenet.cpp
@@ -132,13 +132,15 @@ public:
                   get_weights_accessor(data_path, "Conv2d_0_weights.npy", DataLayout::NCHW),
                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                   PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))
+              .set_name("Conv2d_0")
               << BatchNormalizationLayer(
                   get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_mean.npy"),
                   get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_variance.npy"),
                   get_weights_accessor(data_path, "Conv2d_0_BatchNorm_gamma.npy"),
                   get_weights_accessor(data_path, "Conv2d_0_BatchNorm_beta.npy"),
                   0.001f)
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f));
+              .set_name("Conv2d_0/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name("Conv2d_0/Relu6");
         graph << get_dwsc_node(data_path, "Conv2d_1", 64 * depth_scale, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
         graph << get_dwsc_node(data_path, "Conv2d_2", 128 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
         graph << get_dwsc_node(data_path, "Conv2d_3", 128 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
@@ -152,14 +154,15 @@ public:
         graph << get_dwsc_node(data_path, "Conv2d_11", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
         graph << get_dwsc_node(data_path, "Conv2d_12", 1024 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
         graph << get_dwsc_node(data_path, "Conv2d_13", 1024 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
+        graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("Logits/AvgPool_1a")
               << ConvolutionLayer(
                   1U, 1U, 1001U,
                   get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy", DataLayout::NCHW),
                   get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"),
                   PadStrideInfo(1, 1, 0, 0))
-              << ReshapeLayer(TensorShape(1001U))
-              << SoftmaxLayer()
+              .set_name("Logits/Conv2d_1c_1x1")
+              << ReshapeLayer(TensorShape(1001U)).set_name("Reshape")
+              << SoftmaxLayer().set_name("Softmax")
               << OutputLayer(get_output_accessor(label, 5));
 
         // Finalize graph
@@ -188,25 +191,29 @@ private:
                get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW),
                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                dwc_pad_stride_info)
+           .set_name(total_path + "depthwise/depthwise")
            << BatchNormalizationLayer(
                get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_mean.npy"),
                get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"),
                get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"),
                get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"),
                0.001f)
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
+           .set_name(total_path + "depthwise/BatchNorm")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "depthwise/Relu6")
            << ConvolutionLayer(
                1U, 1U, conv_filt,
                get_weights_accessor(data_path, total_path + "pointwise_weights.npy", DataLayout::NCHW),
                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                conv_pad_stride_info)
+           .set_name(total_path + "pointwise/Conv2D")
            << BatchNormalizationLayer(
                get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_mean.npy"),
                get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_variance.npy"),
                get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_gamma.npy"),
                get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_beta.npy"),
                0.001f)
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f));
+           .set_name(total_path + "pointwise/BatchNorm")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "pointwise/Relu6");
 
         return BranchLayer(std::move(sg));
     }
diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp
index eb74a1aa11..6fc652edbe 100644
--- a/examples/graph_resnet50.cpp
+++ b/examples/graph_resnet50.cpp
@@ -97,6 +97,7 @@ public:
                   get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy"),
                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                   PadStrideInfo(2, 2, 3, 3))
+              .set_name("conv1/convolution")
               << convolution_hint
               << BatchNormalizationLayer(
                   get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_moving_mean.npy"),
@@ -104,22 +105,24 @@ public:
                   get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_gamma.npy"),
                   get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_beta.npy"),
                   0.0000100099996416f)
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)));
+              .set_name("conv1/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool1/MaxPool");
 
         add_residual_block(data_path, "block1", 64, 3, 2);
         add_residual_block(data_path, "block2", 128, 4, 2);
         add_residual_block(data_path, "block3", 256, 6, 2);
         add_residual_block(data_path, "block4", 512, 3, 1);
 
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
+        graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("pool5")
               << ConvolutionLayer(
                   1U, 1U, 1000U,
                   get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy"),
                   get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_biases.npy"),
                   PadStrideInfo(1, 1, 0, 0))
-              << FlattenLayer()
-              << SoftmaxLayer()
+              .set_name("logits/convolution")
+              << FlattenLayer().set_name("predictions/Reshape")
+              << SoftmaxLayer().set_name("predictions/Softmax")
               << OutputLayer(get_output_accessor(label, 5));
 
         // Finalize graph
@@ -142,9 +145,13 @@ private:
     {
         for(unsigned int i = 0; i < num_units; ++i)
         {
-            std::stringstream unit;
-            unit << "/cnn_data/resnet50_model/" << name << "_unit_" << (i + 1) << "_bottleneck_v1_";
-            std::string unit_name = unit.str();
+            std::stringstream unit_path_ss;
+            unit_path_ss << "/cnn_data/resnet50_model/" << name << "_unit_" << (i + 1) << "_bottleneck_v1_";
+            std::stringstream unit_name_ss;
+            unit_name_ss << name << "/unit" << (i + 1) << "/bottleneck_v1/";
+
+            std::string unit_path = unit_path_ss.str();
+            std::string unit_name = unit_name_ss.str();
 
             unsigned int middle_stride = 1;
 
@@ -156,73 +163,81 @@ private:
             SubStream right(graph);
             right << ConvolutionLayer(
                       1U, 1U, base_depth,
-                      get_weights_accessor(data_path, unit_name + "conv1_weights.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv1_weights.npy"),
                       std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                       PadStrideInfo(1, 1, 0, 0))
+                  .set_name(unit_name + "conv1/convolution")
                   << BatchNormalizationLayer(
-                      get_weights_accessor(data_path, unit_name + "conv1_BatchNorm_moving_mean.npy"),
-                      get_weights_accessor(data_path, unit_name + "conv1_BatchNorm_moving_variance.npy"),
-                      get_weights_accessor(data_path, unit_name + "conv1_BatchNorm_gamma.npy"),
-                      get_weights_accessor(data_path, unit_name + "conv1_BatchNorm_beta.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"),
                       0.0000100099996416f)
-                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                  .set_name(unit_name + "conv1/BatchNorm")
+                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu")
 
                   << ConvolutionLayer(
                       3U, 3U, base_depth,
-                      get_weights_accessor(data_path, unit_name + "conv2_weights.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv2_weights.npy"),
                       std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                       PadStrideInfo(middle_stride, middle_stride, 1, 1))
+                  .set_name(unit_name + "conv2/convolution")
                   << BatchNormalizationLayer(
-                      get_weights_accessor(data_path, unit_name + "conv2_BatchNorm_moving_mean.npy"),
-                      get_weights_accessor(data_path, unit_name + "conv2_BatchNorm_moving_variance.npy"),
-                      get_weights_accessor(data_path, unit_name + "conv2_BatchNorm_gamma.npy"),
-                      get_weights_accessor(data_path, unit_name + "conv2_BatchNorm_beta.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"),
                       0.0000100099996416f)
-                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                  .set_name(unit_name + "conv2/BatchNorm")
+                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu")
 
                   << ConvolutionLayer(
                       1U, 1U, base_depth * 4,
-                      get_weights_accessor(data_path, unit_name + "conv3_weights.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv3_weights.npy"),
                       std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                       PadStrideInfo(1, 1, 0, 0))
+                  .set_name(unit_name + "conv3/convolution")
                   << BatchNormalizationLayer(
-                      get_weights_accessor(data_path, unit_name + "conv3_BatchNorm_moving_mean.npy"),
-                      get_weights_accessor(data_path, unit_name + "conv3_BatchNorm_moving_variance.npy"),
-                      get_weights_accessor(data_path, unit_name + "conv3_BatchNorm_gamma.npy"),
-                      get_weights_accessor(data_path, unit_name + "conv3_BatchNorm_beta.npy"),
-                      0.0000100099996416f);
+                      get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_mean.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_variance.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_gamma.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_beta.npy"),
+                      0.0000100099996416f)
+                  .set_name(unit_name + "conv2/BatchNorm");
 
             if(i == 0)
             {
                 SubStream left(graph);
                 left << ConvolutionLayer(
                          1U, 1U, base_depth * 4,
-                         get_weights_accessor(data_path, unit_name + "shortcut_weights.npy"),
+                         get_weights_accessor(data_path, unit_path + "shortcut_weights.npy"),
                          std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                          PadStrideInfo(1, 1, 0, 0))
+                     .set_name(unit_name + "shortcut/convolution")
                      << BatchNormalizationLayer(
-                         get_weights_accessor(data_path, unit_name + "shortcut_BatchNorm_moving_mean.npy"),
-                         get_weights_accessor(data_path, unit_name + "shortcut_BatchNorm_moving_variance.npy"),
-                         get_weights_accessor(data_path, unit_name + "shortcut_BatchNorm_gamma.npy"),
-                         get_weights_accessor(data_path, unit_name + "shortcut_BatchNorm_beta.npy"),
-                         0.0000100099996416f);
-
-                graph << BranchLayer(BranchMergeMethod::ADD, std::move(left), std::move(right));
+                         get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_mean.npy"),
+                         get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_variance.npy"),
+                         get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_gamma.npy"),
+                         get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_beta.npy"),
+                         0.0000100099996416f)
+                     .set_name(unit_name + "shortcut/BatchNorm");
+
+                graph << BranchLayer(BranchMergeMethod::ADD, std::move(left), std::move(right)).set_name(unit_name + "add");
             }
             else if(middle_stride > 1)
             {
                 SubStream left(graph);
-                left << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, PadStrideInfo(middle_stride, middle_stride, 0, 0), true));
+                left << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, PadStrideInfo(middle_stride, middle_stride, 0, 0), true)).set_name(unit_name + "shortcut/MaxPool");
 
-                graph << BranchLayer(BranchMergeMethod::ADD, std::move(left), std::move(right));
+                graph << BranchLayer(BranchMergeMethod::ADD, std::move(left), std::move(right)).set_name(unit_name + "add");
             }
             else
             {
                 SubStream left(graph);
-                graph << BranchLayer(BranchMergeMethod::ADD, std::move(left), std::move(right));
+                graph << BranchLayer(BranchMergeMethod::ADD, std::move(left), std::move(right)).set_name(unit_name + "add");
             }
 
-            graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+            graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu");
         }
     }
 };
diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp
index 9e8e69411f..9c2763f649 100644
--- a/examples/graph_vgg16.cpp
+++ b/examples/graph_vgg16.cpp
@@ -100,7 +100,8 @@ public:
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv1_1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu")
               << convolution3x3_hint
               // Layer 2
               << ConvolutionLayer(
@@ -108,108 +109,123 @@ public:
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv1_2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_2/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool1")
               // Layer 3
               << ConvolutionLayer(
                   3U, 3U, 128U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv2_1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_1/Relu")
               // Layer 4
               << ConvolutionLayer(
                   3U, 3U, 128U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv2_2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_2/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool2")
               // Layer 5
               << ConvolutionLayer(
                   3U, 3U, 256U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv3_1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_1/Relu")
               // Layer 6
               << ConvolutionLayer(
                   3U, 3U, 256U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv3_2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_2/Relu")
               // Layer 7
               << ConvolutionLayer(
                   3U, 3U, 256U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv3_3")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_3/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool3")
               // Layer 8
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv4_1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_1/Relu")
               // Layer 9
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv4_2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_2/Relu")
               // Layer 10
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv4_3")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_3/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool4")
               // Layer 11
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv5_1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_1/Relu")
               // Layer 12
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv5_2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_2/Relu")
               // Layer 13
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv5_3")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_3/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool5")
               // Layer 14
               << FullyConnectedLayer(
                   4096U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_b.npy"))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("fc6")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu")
               // Layer 15
               << FullyConnectedLayer(
                   4096U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_b.npy"))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("fc7")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1")
               // Layer 16
               << FullyConnectedLayer(
                   1000U,
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_b.npy"))
+              .set_name("fc8")
               // Softmax
-              << SoftmaxLayer()
+              << SoftmaxLayer().set_name("prob")
               << OutputLayer(get_output_accessor(label, 5));
 
         // Finalize graph
diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp
index fed2c806ee..0684309111 100644
--- a/examples/graph_vgg19.cpp
+++ b/examples/graph_vgg19.cpp
@@ -100,126 +100,145 @@ public:
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv1_1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu")
               << convolution3x3_hint
               << ConvolutionLayer(
                   3U, 3U, 64U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv1_2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_2/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool1")
               // Layer 2
               << ConvolutionLayer(
                   3U, 3U, 128U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv2_1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_1/Relu")
               << ConvolutionLayer(
                   3U, 3U, 128U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv2_2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_2/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool2")
               // Layer 3
               << ConvolutionLayer(
                   3U, 3U, 256U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv3_1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_1/Relu")
               << ConvolutionLayer(
                   3U, 3U, 256U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv3_2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_2/Relu")
               << ConvolutionLayer(
                   3U, 3U, 256U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv3_3")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_3/Relu")
               << ConvolutionLayer(
                   3U, 3U, 256U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv3_4")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_4/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool3")
               // Layer 4
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv4_1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_1/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv4_2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_2/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv4_3")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_3/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv4_4")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_4/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool4")
               // Layer 5
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv5_1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_1/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv5_2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_2/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("conv5_3")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_3/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)))
+              .set_name("conv5_4")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_4/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool5")
               // Layer 6
               << FullyConnectedLayer(
                   4096U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_b.npy"))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("fc6")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu")
               // Layer 7
               << FullyConnectedLayer(
                   4096U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_b.npy"))
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+              .set_name("fc7")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1")
               // Layer 8
               << FullyConnectedLayer(
                   1000U,
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_w.npy"),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_b.npy"))
+              .set_name("fc8")
               // Softmax
-              << SoftmaxLayer()
+              << SoftmaxLayer().set_name("prob")
               << OutputLayer(get_output_accessor(label, 5));
 
         // Finalize graph
diff --git a/src/graph/Workload.cpp b/src/graph/Workload.cpp
index f350bbf625..d8046c3511 100644
--- a/src/graph/Workload.cpp
+++ b/src/graph/Workload.cpp
@@ -32,9 +32,14 @@ namespace graph
 {
 void ExecutionTask::operator()()
 {
-    if(task)
+    TaskExecutor::get().execute_function(*this);
+}
+
+void execute_task(ExecutionTask &task)
+{
+    if(task.task)
     {
-        task->run();
+        task.task->run();
     }
 }
 
@@ -45,5 +50,16 @@ void ExecutionTask::prepare()
         task->prepare();
     }
 }
+
+TaskExecutor::TaskExecutor()
+    : execute_function(execute_task)
+{
+}
+
+TaskExecutor &TaskExecutor::get()
+{
+    static TaskExecutor executor;
+    return executor;
+}
 } // namespace graph
 } // namespace arm_compute
\ No newline at end of file
diff --git a/tests/SConscript b/tests/SConscript
index 7aa4d9391f..6a6ee623b2 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -60,12 +60,13 @@ test_env.Append(LIBS = arm_compute_test_framework)
 if env['os'] in ['android', 'bare_metal'] or env['standalone']:
     Import("arm_compute_a")
     Import("arm_compute_core_a")
-    test_env.Append(LIBS = [arm_compute_a, arm_compute_core_a])
-    arm_compute_lib = arm_compute_a
+    Import("arm_compute_graph_a")
+    test_env.Append(LIBS = [arm_compute_graph_a, arm_compute_a, arm_compute_core_a])
+    arm_compute_lib = arm_compute_graph_a
 else:
-    Import("arm_compute_so")
-    test_env.Append(LIBS = ["arm_compute", "arm_compute_core"])
-    arm_compute_lib = arm_compute_so
+    Import("arm_compute_graph_so")
+    test_env.Append(LIBS = ["arm_compute_graph", "arm_compute", "arm_compute_core"])
+    arm_compute_lib = arm_compute_graph_so
 
 #FIXME Delete before release
 if env['internal_only']:
@@ -158,24 +159,15 @@ if test_env['validation_tests']:
                 cl_examples += [ test_env.Program(example, [ test_env.Object(source=file, target=example) ] + files_validate_examples, LIBS = test_env["LIBS"]) ]
             arm_compute_validate_examples += cl_examples
             if test_env['opencl'] and test_env['neon']:
-                if env['os'] in ['android', 'bare_metal'] or env['standalone']:
-                    Import('arm_compute_graph_a')
-                    graph_dependency = [ arm_compute_graph_a]
-                else:
-                    Import('arm_compute_graph_so')
-                    graph_dependency = [ arm_compute_graph_so]
-
                 graph_utils = test_env.Object(source="../utils/GraphUtils.cpp", target="GraphUtils")
                 for file in Glob("validate_examples/graph_*.cpp"):
                     example = "validate_" + os.path.basename(os.path.splitext(str(file))[0])
                     if env['os'] in ['android', 'bare_metal'] or env['standalone']:
-                        prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_validate_examples, LIBS = test_env["LIBS"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--whole-archive',graph_dependency,'-Wl,--no-whole-archive'])
-                        Depends(prog, graph_dependency)
+                        prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_validate_examples, LIBS = test_env["LIBS"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--whole-archive',arm_compute_lib,'-Wl,--no-whole-archive'])
                         arm_compute_validate_examples += [ prog ]
                     else:
                         #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies
                         prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_validate_examples, LIBS = test_env["LIBS"] + ["arm_compute_graph"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] )
-                        Depends(prog, graph_dependency)
                         arm_compute_validate_examples += [ prog ]
         Depends(arm_compute_validate_examples, arm_compute_test_framework)
         Depends(arm_compute_validate_examples, arm_compute_lib)
@@ -201,23 +193,15 @@ if test_env['benchmark_examples']:
         arm_compute_benchmark_examples += cl_examples
 
     # Graph examples
-    if env['os'] in ['android', 'bare_metal'] or env['standalone']:
-        Import('arm_compute_graph_a')
-        graph_dependency = [arm_compute_graph_a]
-    else:
-        Import('arm_compute_graph_so')
-        graph_dependency = [arm_compute_graph_so]
     graph_utils = test_env.Object(source="../utils/GraphUtils.cpp", target="GraphUtils")
     for file in Glob("../examples/graph_*.cpp"):
         example = "benchmark_" + os.path.basename(os.path.splitext(str(file))[0])
         if env['os'] in ['android', 'bare_metal'] or env['standalone']:
-            prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_benchmark_examples, LIBS = test_env["LIBS"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--whole-archive',graph_dependency,'-Wl,--no-whole-archive'])
-            Depends(prog, [graph_dependency])
+            prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_benchmark_examples, LIBS = test_env["LIBS"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--whole-archive',arm_compute_lib,'-Wl,--no-whole-archive'])
             arm_compute_benchmark_examples += [ prog ]
         else:
             #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies
             prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_benchmark_examples, LIBS = test_env["LIBS"] + ["arm_compute_graph"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] )
-            Depends(prog, graph_dependency)
             arm_compute_benchmark_examples += [ prog ]
     Depends(arm_compute_benchmark_examples, arm_compute_test_framework)
     Depends(arm_compute_benchmark_examples, arm_compute_lib)
diff --git a/tests/framework/Profiler.cpp b/tests/framework/Profiler.cpp
index 69ea527a80..7b95279b31 100644
--- a/tests/framework/Profiler.cpp
+++ b/tests/framework/Profiler.cpp
@@ -55,9 +55,9 @@ void Profiler::start()
 
 void Profiler::stop()
 {
-    for(auto &instrument : _instruments)
+    for(auto instrument = _instruments.rbegin(); instrument != _instruments.rend(); instrument++)
     {
-        instrument->stop();
+        (*instrument)->stop();
     }
     for(const auto &instrument : _instruments)
     {
@@ -70,9 +70,9 @@ void Profiler::stop()
 
 void Profiler::test_stop()
 {
-    for(auto &instrument : _instruments)
+    for(auto instrument = _instruments.rbegin(); instrument != _instruments.rend(); instrument++)
     {
-        instrument->test_stop();
+        (*instrument)->test_stop();
     }
 
     for(const auto &instrument : _instruments)
diff --git a/tests/framework/instruments/OpenCLTimer.cpp b/tests/framework/instruments/OpenCLTimer.cpp
index d9d16bc829..4af6dae8e7 100644
--- a/tests/framework/instruments/OpenCLTimer.cpp
+++ b/tests/framework/instruments/OpenCLTimer.cpp
@@ -26,6 +26,7 @@
 #include "../Framework.h"
 #include "../Utils.h"
 
+#include "arm_compute/graph/INode.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 
 #ifndef ARM_COMPUTE_CL
@@ -44,7 +45,7 @@ std::string OpenCLTimer::id() const
 }
 
 OpenCLTimer::OpenCLTimer(ScaleFactor scale_factor)
-    : real_function(CLSymbols::get().clEnqueueNDRangeKernel_ptr)
+    : _kernels(), _real_function(nullptr), _real_graph_function(nullptr), _prefix()
 {
     auto                        q     = CLScheduler::get().queue();
     cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
@@ -76,20 +77,23 @@ OpenCLTimer::OpenCLTimer(ScaleFactor scale_factor)
     }
 }
 
-void OpenCLTimer::start()
+void OpenCLTimer::test_start()
 {
-    kernels.clear();
     // Start intercepting enqueues:
-    auto interceptor = [this](
-                           cl_command_queue command_queue,
-                           cl_kernel        kernel,
-                           cl_uint          work_dim,
-                           const size_t    *gwo,
-                           const size_t    *gws,
-                           const size_t    *lws,
-                           cl_uint          num_events_in_wait_list,
-                           const cl_event * event_wait_list,
-                           cl_event *       event)
+    ARM_COMPUTE_ERROR_ON(_real_function != nullptr);
+    ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
+    _real_function       = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
+    _real_graph_function = graph::TaskExecutor::get().execute_function;
+    auto interceptor     = [this](
+                               cl_command_queue command_queue,
+                               cl_kernel        kernel,
+                               cl_uint          work_dim,
+                               const size_t    *gwo,
+                               const size_t    *gws,
+                               const size_t    *lws,
+                               cl_uint          num_events_in_wait_list,
+                               const cl_event * event_wait_list,
+                               cl_event *       event)
     {
         ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported");
         ARM_COMPUTE_UNUSED(event);
@@ -97,7 +101,7 @@ void OpenCLTimer::start()
         OpenCLTimer::kernel_info info;
         cl::Kernel               cpp_kernel(kernel, true);
         std::stringstream        ss;
-        ss << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
+        ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
         if(gws != nullptr)
         {
             ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
@@ -108,26 +112,50 @@ void OpenCLTimer::start()
         }
         info.name = ss.str();
         cl_event tmp;
-        cl_int   retval = this->real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
+        cl_int   retval = this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
         info.event      = tmp;
-        this->kernels.push_back(std::move(info));
+        this->_kernels.push_back(std::move(info));
         return retval;
     };
 
+    // Start intercepting tasks:
+    auto task_interceptor = [this](graph::ExecutionTask & task)
+    {
+        if(task.node != nullptr && !task.node->name().empty())
+        {
+            this->_prefix = task.node->name() + "/";
+        }
+        else
+        {
+            this->_prefix = "";
+        }
+        this->_real_graph_function(task);
+        this->_prefix = "";
+    };
+
     CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
+    graph::TaskExecutor::get().execute_function = task_interceptor;
+}
+
+void OpenCLTimer::start()
+{
+    _kernels.clear();
 }
 
-void OpenCLTimer::stop()
+void OpenCLTimer::test_stop()
 {
     // Restore real function
-    CLSymbols::get().clEnqueueNDRangeKernel_ptr = real_function;
+    CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function;
+    graph::TaskExecutor::get().execute_function = _real_graph_function;
+    _real_graph_function                        = nullptr;
+    _real_function                              = nullptr;
 }
 
 Instrument::MeasurementsMap OpenCLTimer::measurements() const
 {
     MeasurementsMap measurements;
     unsigned int    kernel_number = 0;
-    for(auto kernel : kernels)
+    for(auto kernel : _kernels)
     {
         cl_ulong start = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
         cl_ulong end   = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
diff --git a/tests/framework/instruments/OpenCLTimer.h b/tests/framework/instruments/OpenCLTimer.h
index 44578782ed..059f4493f9 100644
--- a/tests/framework/instruments/OpenCLTimer.h
+++ b/tests/framework/instruments/OpenCLTimer.h
@@ -30,6 +30,8 @@
 #include "arm_compute/core/CL/OpenCL.h"
 #endif /* ARM_COMPUTE_CL */
 
+#include "arm_compute/graph/Workload.h"
+
 #include <list>
 
 namespace arm_compute
@@ -48,17 +50,22 @@ public:
      */
     OpenCLTimer(ScaleFactor scale_factor);
     std::string     id() const override;
+    void            test_start() override;
     void            start() override;
-    void            stop() override;
+    void            test_stop() override;
     MeasurementsMap measurements() const override;
+
+private:
 #ifdef ARM_COMPUTE_CL
     struct kernel_info
     {
         cl::Event   event{}; /**< OpenCL event associated to the kernel enqueue */
         std::string name{};  /**< OpenCL Kernel name */
     };
-    std::list<kernel_info>                          kernels{};
-    std::function<decltype(clEnqueueNDRangeKernel)> real_function;
+    std::list<kernel_info>                          _kernels;
+    std::function<decltype(clEnqueueNDRangeKernel)> _real_function;
+    std::function<decltype(graph::execute_task)>    _real_graph_function;
+    std::string                                     _prefix;
 #endif /* ARM_COMPUTE_CL */
 
 private:
diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp
index e42cebde21..1b37b189dd 100644
--- a/tests/framework/instruments/SchedulerTimer.cpp
+++ b/tests/framework/instruments/SchedulerTimer.cpp
@@ -25,6 +25,8 @@
 
 #include "WallClockTimer.h"
 #include "arm_compute/core/CPP/ICPPKernel.h"
+#include "arm_compute/core/utils/misc/Cast.h"
+#include "arm_compute/graph/INode.h"
 
 namespace arm_compute
 {
@@ -42,7 +44,7 @@ class Interceptor final : public IScheduler
 public:
     /** Default constructor. */
     Interceptor(std::list<SchedulerTimer::kernel_info> &kernels, IScheduler &real_scheduler, ScaleFactor scale_factor)
-        : _kernels(kernels), _real_scheduler(real_scheduler), _timer(scale_factor)
+        : _kernels(kernels), _real_scheduler(real_scheduler), _timer(scale_factor), _prefix()
     {
     }
 
@@ -56,6 +58,11 @@ public:
         return _real_scheduler.num_threads();
     }
 
+    void set_prefix(std::string prefix)
+    {
+        _prefix = std::move(prefix);
+    }
+
     void schedule(ICPPKernel *kernel, unsigned int split_dimension) override
     {
         _timer.start();
@@ -64,6 +71,7 @@ public:
 
         SchedulerTimer::kernel_info info;
         info.name         = kernel->name();
+        info.prefix       = _prefix;
         info.measurements = _timer.measurements();
         _kernels.push_back(std::move(info));
     }
@@ -72,32 +80,68 @@ private:
     std::list<SchedulerTimer::kernel_info> &_kernels;
     IScheduler                             &_real_scheduler;
     WallClockTimer                          _timer;
+    std::string                             _prefix;
 };
 
 SchedulerTimer::SchedulerTimer(ScaleFactor scale_factor)
-    : _kernels(), _real_scheduler(nullptr), _real_scheduler_type(), _scale_factor(scale_factor)
+    : _kernels(), _real_scheduler(nullptr), _real_scheduler_type(), _real_graph_function(nullptr), _scale_factor(scale_factor), _interceptor(nullptr)
 {
 }
 
-void SchedulerTimer::start()
+void SchedulerTimer::test_start()
 {
+    // Start intercepting tasks:
+    ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
+    _real_graph_function  = graph::TaskExecutor::get().execute_function;
+    auto task_interceptor = [this](graph::ExecutionTask & task)
+    {
+        Interceptor *scheduler = nullptr;
+        if(dynamic_cast<Interceptor *>(this->_interceptor.get()) != nullptr)
+        {
+            scheduler = arm_compute::utils::cast::polymorphic_downcast<Interceptor *>(_interceptor.get());
+            if(task.node != nullptr && !task.node->name().empty())
+            {
+                scheduler->set_prefix(task.node->name() + "/");
+            }
+            else
+            {
+                scheduler->set_prefix("");
+            }
+        }
+
+        this->_real_graph_function(task);
+
+        if(scheduler != nullptr)
+        {
+            scheduler->set_prefix("");
+        }
+    };
+
     ARM_COMPUTE_ERROR_ON(_real_scheduler != nullptr);
     _real_scheduler_type = Scheduler::get_type();
     //Note: We can't currently replace a custom scheduler
     if(_real_scheduler_type != Scheduler::Type::CUSTOM)
     {
-        _real_scheduler  = &Scheduler::get();
-        auto interceptor = std::make_shared<Interceptor>(_kernels, *_real_scheduler, _scale_factor);
-        Scheduler::set(std::static_pointer_cast<IScheduler>(interceptor));
+        _real_scheduler = &Scheduler::get();
+        _interceptor    = std::make_shared<Interceptor>(_kernels, *_real_scheduler, _scale_factor);
+        Scheduler::set(std::static_pointer_cast<IScheduler>(_interceptor));
+        graph::TaskExecutor::get().execute_function = task_interceptor;
     }
+}
+
+void SchedulerTimer::start()
+{
     _kernels.clear();
 }
 
-void SchedulerTimer::stop()
+void SchedulerTimer::test_stop()
 {
     // Restore real scheduler
     Scheduler::set(_real_scheduler_type);
-    _real_scheduler = nullptr;
+    _real_scheduler                             = nullptr;
+    _interceptor                                = nullptr;
+    graph::TaskExecutor::get().execute_function = _real_graph_function;
+    _real_graph_function                        = nullptr;
 }
 
 Instrument::MeasurementsMap SchedulerTimer::measurements() const
@@ -106,7 +150,7 @@ Instrument::MeasurementsMap SchedulerTimer::measurements() const
     unsigned int    kernel_number = 0;
     for(auto kernel : _kernels)
     {
-        measurements.emplace(kernel.name + " #" + support::cpp11::to_string(kernel_number++), kernel.measurements.begin()->second);
+        measurements.emplace(kernel.prefix + kernel.name + " #" + support::cpp11::to_string(kernel_number++), kernel.measurements.begin()->second);
     }
 
     return measurements;
diff --git a/tests/framework/instruments/SchedulerTimer.h b/tests/framework/instruments/SchedulerTimer.h
index ec282cc905..55d5f25b75 100644
--- a/tests/framework/instruments/SchedulerTimer.h
+++ b/tests/framework/instruments/SchedulerTimer.h
@@ -25,7 +25,9 @@
 #define ARM_COMPUTE_TEST_SCHEDULER_TIMER
 
 #include "Instrument.h"
+#include "arm_compute/graph/Workload.h"
 #include "arm_compute/runtime/Scheduler.h"
+
 #include <list>
 
 namespace arm_compute
@@ -50,8 +52,9 @@ public:
     SchedulerTimer &operator=(const SchedulerTimer &) = delete;
 
     std::string                 id() const override;
+    void                        test_start() override;
     void                        start() override;
-    void                        stop() override;
+    void                        test_stop() override;
     Instrument::MeasurementsMap measurements() const override;
 
     /** Kernel information */
@@ -59,13 +62,16 @@ public:
     {
         Instrument::MeasurementsMap measurements{}; /**< Time it took the kernel to run */
         std::string                 name{};         /**< Kernel name */
+        std::string                 prefix{};       /**< Kernel prefix */
     };
 
 private:
-    std::list<kernel_info> _kernels;
-    IScheduler            *_real_scheduler;
-    Scheduler::Type        _real_scheduler_type;
-    ScaleFactor            _scale_factor;
+    std::list<kernel_info>                       _kernels;
+    IScheduler                                  *_real_scheduler;
+    Scheduler::Type                              _real_scheduler_type;
+    std::function<decltype(graph::execute_task)> _real_graph_function;
+    ScaleFactor                                  _scale_factor;
+    std::shared_ptr<IScheduler>                  _interceptor;
 };
 } // namespace framework
 } // namespace test
diff --git a/tests/framework/printers/PrettyPrinter.cpp b/tests/framework/printers/PrettyPrinter.cpp
index ef8f91a796..318195109c 100644
--- a/tests/framework/printers/PrettyPrinter.cpp
+++ b/tests/framework/printers/PrettyPrinter.cpp
@@ -129,8 +129,8 @@ void PrettyPrinter::print_measurements(const Profiler::MeasurementsMap &measurem
         if(instrument.second.size() > 1)
         {
             *_stream << ", STDDEV=" << arithmetic_to_string(stats.relative_standard_deviation(), 2) << " %";
-            *_stream << ", MIN=" << stats.min() << ", ";
-            *_stream << ", MAX=" << stats.max() << ", ";
+            *_stream << ", MIN=" << stats.min();
+            *_stream << ", MAX=" << stats.max();
             *_stream << ", MEDIAN=" << stats.median().value() << " " << stats.median().unit();
         }
         *_stream << end_color() << "\n";
-- 
cgit v1.2.1