80 files changed, 1958 insertions, 1017 deletions
diff --git a/src/graph/DataLayerVisitor.cpp b/src/graph/DataLayerVisitor.cpp
new file mode 100644
index 0000000000..f0fac25577
--- /dev/null
+++ b/src/graph/DataLayerVisitor.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2021, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/DataLayerVisitor.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace
+{
+template <typename T>
+void add_convolution_layer_data(DataLayerVisitor::LayerData &layer_data, T &node)
+{
+    PadStrideInfo ps_info = node.convolution_info();
+    DataLayout    layout  = node.output(0)->desc().layout;
+    // Add data layout
+    layer_data["data_layout"] = to_string(layout);
+    // Add padding info
+    std::ostringstream padding;
+    padding << "[" << to_string(ps_info.pad_left()) << "," << to_string(ps_info.pad_top()) << ","
+            << to_string(ps_info.pad_bottom()) << "," << to_string(ps_info.pad_right()) << "]";
+
+    layer_data["pad"] = padding.str();
+
+    // Add stride info
+    std::ostringstream stride;
+    stride << "[" << to_string(ps_info.stride().first) << "," << to_string(ps_info.stride().second) << "]";
+
+    layer_data["stride"] = stride.str();
+
+    // Add dilation info
+    // graph api does not support dilation > 1
+    layer_data["dilation"] = "[1,1]";
+
+    // Add bias enabled?
+    // Assumes three inputs (input, weights, bias)
+    std::string bias_enabled   = node.input(2) == nullptr ? "0" : "1";
+    layer_data["bias_enabled"] = bias_enabled;
+
+    // Change input names for weights / bias (if applicable)
+    // Assumes input(1) is weights and input(2) is bias
+    if (layer_data.count("input_shape1"))
+    {
+        layer_data["weights_shape"] = layer_data["input_shape1"];
+        layer_data.erase("input_shape1");
+    }
+    if (layer_data.count("input_shape2"))
+    {
+        layer_data["bias_shape"] = layer_data["input_shape2"];
+        layer_data.erase("input_shape2");
+    }
+}
+
+template <typename T>
+void add_convolution_layer_method(DataLayerVisitor::LayerData &layer_data, T &node)
+{
+    std::ostringstream method;
+    method << node.convolution_method();
+    layer_data["convolution_method"] = method.str();
+}
+
+template <typename T>
+void add_generic_layer_data(DataLayerVisitor::LayerData &layer_data, T &node)
+{
+    // Loop over each input tensor
+    for (size_t tensor_no = 0; tensor_no < node.num_inputs(); ++tensor_no)
+    {
+        // Add input tensor shapes
+        if (node.input(tensor_no) != nullptr)
+        {
+            layer_data["input_shape" + to_string(tensor_no)] =
+                "[" + to_string(node.input(tensor_no)->desc().shape) + "]";
+        }
+    }
+    // Add output tensor shape
+    if (node.output(0) != nullptr)
+    {
+        layer_data["output_shape0"] = "[" + to_string(node.output(0)->desc().shape) + "]";
+    }
+}
+} // namespace
+
+void DataLayerVisitor::visit(ConvolutionLayerNode &n)
+{
+    _layer_data.clear();
+    add_generic_layer_data<ConvolutionLayerNode>(_layer_data, n);
+    add_convolution_layer_data<ConvolutionLayerNode>(_layer_data, n);
+    add_convolution_layer_method<ConvolutionLayerNode>(_layer_data, n);
+}
+
+void DataLayerVisitor::visit(DepthwiseConvolutionLayerNode &n)
+{
+    _layer_data.clear();
+    add_generic_layer_data<DepthwiseConvolutionLayerNode>(_layer_data, n);
+    add_convolution_layer_data<DepthwiseConvolutionLayerNode>(_layer_data, n);
+}
+
+void DataLayerVisitor::visit(FusedConvolutionBatchNormalizationNode &n)
+{
+    _layer_data.clear();
+    add_generic_layer_data<FusedConvolutionBatchNormalizationNode>(_layer_data, n);
+    add_convolution_layer_data<FusedConvolutionBatchNormalizationNode>(_layer_data, n);
+    add_convolution_layer_method<FusedConvolutionBatchNormalizationNode>(_layer_data, n);
+}
+
+void DataLayerVisitor::visit(FusedDepthwiseConvolutionBatchNormalizationNode &n)
+{
+    _layer_data.clear();
+    add_generic_layer_data<FusedDepthwiseConvolutionBatchNormalizationNode>(_layer_data, n);
+    add_convolution_layer_data<FusedDepthwiseConvolutionBatchNormalizationNode>(_layer_data, n);
+}
+
+void DataLayerVisitor::visit(OutputNode &n)
+{
+    _layer_data.clear();
+    ARM_COMPUTE_UNUSED(n);
+}
+
+void DataLayerVisitor::default_visit(INode &n)
+{
+    _layer_data.clear();
+    add_generic_layer_data<INode>(_layer_data, n);
+}
+
+const DataLayerVisitor::LayerData &DataLayerVisitor::layer_data() const
+{
+    return _layer_data;
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/Graph.cpp b/src/graph/Graph.cpp
index 4ce53589d4..3ae83f2e80 100644
--- a/src/graph/Graph.cpp
+++ b/src/graph/Graph.cpp
@@ -34,24 +34,24 @@ Graph::Graph(GraphID id, std::string name)
 
 bool Graph::remove_node(NodeID nid)
 {
-    if(nid >= _nodes.size())
+    if (nid >= _nodes.size())
     {
         return false;
     }
 
     std::unique_ptr<INode> &node = _nodes[nid];
 
-    if(node)
+    if (node)
     {
         // Remove input connections
-        for(auto &input_eid : node->_input_edges)
+        for (auto &input_eid : node->_input_edges)
         {
             remove_connection(input_eid);
         }
 
         // Remove output connections
         std::set<EdgeID> output_edges_copy = node->output_edges();
-        for(auto &output_eid : output_edges_copy)
+        for (auto &output_eid : output_edges_copy)
         {
             remove_connection(output_eid);
         }
@@ -71,8 +71,10 @@ EdgeID Graph::add_connection(NodeID source, size_t source_idx, NodeID sink, size
     arm_compute::lock_guard<arm_compute::Mutex> lock(_mtx);
 
     // Check if node index is valid, if node exists and finally if the connection index is valid
-    ARM_COMPUTE_ERROR_ON((source >= _nodes.size()) || (_nodes[source] == nullptr) || (source_idx >= _nodes[source]->num_outputs()));
-    ARM_COMPUTE_ERROR_ON((sink >= _nodes.size()) || (_nodes[sink] == nullptr) || (sink_idx >= _nodes[sink]->num_inputs()));
+    ARM_COMPUTE_ERROR_ON((source >= _nodes.size()) || (_nodes[source] == nullptr) ||
+                         (source_idx >= _nodes[source]->num_outputs()));
+    ARM_COMPUTE_ERROR_ON((sink >= _nodes.size()) || (_nodes[sink] == nullptr) ||
+                         (sink_idx >= _nodes[sink]->num_inputs()));
 
     // Get nodes
     std::unique_ptr<INode> &source_node = _nodes[source];
@@ -80,23 +82,25 @@ EdgeID Graph::add_connection(NodeID source, size_t source_idx, NodeID sink, size
 
     // Check for duplicate connections (Check only sink node)
     Edge *sink_node_edge = sink_node->input_edge(sink_idx);
-    if((sink_node_edge != nullptr) && (sink_node_edge->producer_id() == source) && (sink_node_edge->producer_idx() == source_idx)
-       && (sink_node_edge->consumer_id() == sink) && (sink_node_edge->consumer_idx() == sink_idx))
+    if ((sink_node_edge != nullptr) && (sink_node_edge->producer_id() == source) &&
+        (sink_node_edge->producer_idx() == source_idx) && (sink_node_edge->consumer_id() == sink) &&
+        (sink_node_edge->consumer_idx() == sink_idx))
     {
         return sink_node_edge->id();
     }
 
     // Check if there is already a tensor associated with output if not create one
     TensorID tid = source_node->output_id(source_idx);
-    if(tid == NullTensorID)
+    if (tid == NullTensorID)
     {
         tid = create_tensor();
     }
     std::unique_ptr<Tensor> &tensor = _tensors[tid];
 
     // Create connections
-    EdgeID eid        = _edges.size();
-    auto   connection = std::make_unique<Edge>(eid, source_node.get(), source_idx, sink_node.get(), sink_idx, tensor.get());
+    EdgeID eid = _edges.size();
+    auto   connection =
+        std::make_unique<Edge>(eid, source_node.get(), source_idx, sink_node.get(), sink_idx, tensor.get());
     _edges.push_back(std::move(connection));
 
     // Add connections to source and sink nodes
@@ -117,7 +121,7 @@ EdgeID Graph::add_connection(NodeID source, size_t source_idx, NodeID sink, size
 
 bool Graph::remove_connection(EdgeID eid)
 {
-    if(eid >= _edges.size())
+    if (eid >= _edges.size())
     {
         return false;
     }
@@ -125,22 +129,22 @@ bool Graph::remove_connection(EdgeID eid)
     std::unique_ptr<Edge> &edge = _edges[eid];
 
     // Remove node connections
-    if(edge != nullptr)
+    if (edge != nullptr)
     {
         // Get tensor bound to the edge
-        if(edge->tensor() != nullptr)
+        if (edge->tensor() != nullptr)
         {
             edge->tensor()->unbind_edge(eid);
         }
 
         // Remove edges from source node
-        if(edge->producer() != nullptr)
+        if (edge->producer() != nullptr)
         {
             edge->producer()->_output_edges.erase(eid);
         }
 
         // Remove edges from sink node
-        if((edge->consumer() != nullptr) && (edge->consumer_idx() < edge->consumer()->_input_edges.size()))
+        if ((edge->consumer() != nullptr) && (edge->consumer_idx() < edge->consumer()->_input_edges.size()))
         {
             edge->consumer()->_input_edges[edge->consumer_idx()] = EmptyEdgeID;
         }
@@ -231,4 +235,4 @@ Tensor *Graph::tensor(TensorID id)
     return (id >= _tensors.size()) ? nullptr : _tensors[id].get();
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 01d35a15b9..eab91b2347 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,10 +23,11 @@
  */
 #include "arm_compute/graph/GraphBuilder.h"
 
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/Utils.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
 #include "arm_compute/graph/algorithms/TopologicalSort.h"
+#include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/Utils.h"
 
 #include "support/ToolchainSupport.h"
 
@@ -40,7 +41,8 @@ inline void check_nodeidx_pair(const NodeIdxPair &pair, const Graph &g)
 {
     ARM_COMPUTE_UNUSED(pair);
     ARM_COMPUTE_UNUSED(g);
-    ARM_COMPUTE_ERROR_ON((pair.node_id >= g.nodes().size()) || (g.node((pair).node_id) == nullptr) || (pair.index >= g.node(pair.node_id)->num_outputs()));
+    ARM_COMPUTE_ERROR_ON((pair.node_id >= g.nodes().size()) || (g.node((pair).node_id) == nullptr) ||
+                         (pair.index >= g.node(pair.node_id)->num_outputs()));
 }
 
 Status set_node_params(Graph &g, NodeID nid, NodeParams &params)
@@ -66,7 +68,8 @@ Status set_accessor_on_node(Graph &g, NodeID nid, bool is_output, size_t idx, IT
     return Status{};
 }
 
-NodeID add_const_node_with_name(Graph &g, NodeParams params, const std::string &name, const TensorDescriptor &desc, ITensorAccessorUPtr accessor)
+NodeID add_const_node_with_name(
+    Graph &g, NodeParams params, const std::string &name, const TensorDescriptor &desc, ITensorAccessorUPtr accessor)
 {
     params.name = params.name.empty() ? "" : params.name + name;
     auto nid    = GraphBuilder::add_const_node(g, params, desc, std::move(accessor));
@@ -75,7 +78,7 @@ NodeID add_const_node_with_name(Graph &g, NodeParams params, const std::string &
 }
 
 template <typename NT, typename... Args>
-NodeID create_simple_single_input_output_node(Graph &g, NodeParams &params, NodeIdxPair input, Args &&... args)
+NodeID create_simple_single_input_output_node(Graph &g, NodeParams &params, NodeIdxPair input, Args &&...args)
 {
     check_nodeidx_pair(input, g);
 
@@ -87,14 +90,17 @@ NodeID create_simple_single_input_output_node(Graph &g, NodeParams &params, Node
 }
 
 template <typename NT, typename... Args>
-NodeID create_simple_multiple_input_single_output_node(Graph &g, NodeParams &params, const std::vector<NodeIdxPair> &inputs, Args &&... args)
+NodeID create_simple_multiple_input_single_output_node(Graph                          &g,
+                                                       NodeParams                     &params,
+                                                       const std::vector<NodeIdxPair> &inputs,
+                                                       Args &&...args)
 {
     ARM_COMPUTE_ERROR_ON(inputs.size() == 0);
 
     NodeID nid = g.add_node<NT>(std::forward<Args>(args)...);
 
     unsigned int i = 0;
-    for(const auto &input : inputs)
+    for (const auto &input : inputs)
     {
         check_nodeidx_pair(input, g);
         g.add_connection(input.node_id, input.index, nid, i++);
@@ -105,7 +111,8 @@ NodeID create_simple_multiple_input_single_output_node(Graph &g, NodeParams &par
 }
 } // namespace
 
-NodeID GraphBuilder::add_const_node(Graph &g, NodeParams params, const TensorDescriptor &desc, ITensorAccessorUPtr accessor)
+NodeID
+GraphBuilder::add_const_node(Graph &g, NodeParams params, const TensorDescriptor &desc, ITensorAccessorUPtr accessor)
 {
     auto nid = g.add_node<ConstNode>(desc);
     set_node_params(g, nid, params);
@@ -113,7 +120,8 @@ NodeID GraphBuilder::add_const_node(Graph &g, NodeParams params, const TensorDes
     return nid;
 }
 
-NodeID GraphBuilder::add_input_node(Graph &g, NodeParams params, const TensorDescriptor &desc, ITensorAccessorUPtr accessor)
+NodeID
+GraphBuilder::add_input_node(Graph &g, NodeParams params, const TensorDescriptor &desc, ITensorAccessorUPtr accessor)
 {
     auto nid = g.add_node<InputNode>(desc);
     set_node_params(g, nid, params);
@@ -133,21 +141,35 @@ NodeID GraphBuilder::add_output_node(Graph &g, NodeParams params, NodeIdxPair in
     return nid;
 }
 
-NodeID GraphBuilder::add_activation_node(Graph &g, NodeParams params, NodeIdxPair input, ActivationLayerInfo act_info,
+NodeID GraphBuilder::add_activation_node(Graph                  &g,
+                                         NodeParams              params,
+                                         NodeIdxPair             input,
+                                         ActivationLayerInfo     act_info,
                                          const QuantizationInfo &out_quant_info)
 {
     return create_simple_single_input_output_node<ActivationLayerNode>(g, params, input, act_info, out_quant_info);
 }
 
-NodeID GraphBuilder::add_arg_min_max_node(Graph &g, NodeParams params, NodeIdxPair input, ReductionOperation op, unsigned int axis,
-                                          DataType out_data_type, const QuantizationInfo &out_quant_info)
+NodeID GraphBuilder::add_arg_min_max_node(Graph                  &g,
+                                          NodeParams              params,
+                                          NodeIdxPair             input,
+                                          ReductionOperation      op,
+                                          unsigned int            axis,
+                                          DataType                out_data_type,
+                                          const QuantizationInfo &out_quant_info)
 {
-    return create_simple_single_input_output_node<ArgMinMaxLayerNode>(g, params, input, op, axis, out_data_type, out_quant_info);
+    return create_simple_single_input_output_node<ArgMinMaxLayerNode>(g, params, input, op, axis, out_data_type,
+                                                                      out_quant_info);
 }
 
-NodeID GraphBuilder::add_batch_normalization_node(Graph &g, NodeParams params, NodeIdxPair input, float epsilon,
-                                                  ITensorAccessorUPtr mean_accessor, ITensorAccessorUPtr var_accessor,
-                                                  ITensorAccessorUPtr beta_accessor, ITensorAccessorUPtr gamma_accessor)
+NodeID GraphBuilder::add_batch_normalization_node(Graph              &g,
+                                                  NodeParams          params,
+                                                  NodeIdxPair         input,
+                                                  float               epsilon,
+                                                  ITensorAccessorUPtr mean_accessor,
+                                                  ITensorAccessorUPtr var_accessor,
+                                                  ITensorAccessorUPtr beta_accessor,
+                                                  ITensorAccessorUPtr gamma_accessor)
 {
     check_nodeidx_pair(input, g);
 
@@ -167,14 +189,14 @@ NodeID GraphBuilder::add_batch_normalization_node(Graph &g, NodeParams params, N
 
     // Create beta node
     NodeID beta_nid = EmptyNodeID;
-    if(has_beta)
+    if (has_beta)
     {
         beta_nid = add_const_node_with_name(g, params, "Beta", common_desc, std::move(beta_accessor));
     }
 
     // Create gamma node
     NodeID gamma_nid = EmptyNodeID;
-    if(has_gamma)
+    if (has_gamma)
     {
         gamma_nid = add_const_node_with_name(g, params, "Gamma", common_desc, std::move(gamma_accessor));
     }
@@ -184,11 +206,11 @@ NodeID GraphBuilder::add_batch_normalization_node(Graph &g, NodeParams params, N
     g.add_connection(input.node_id, input.index, batch_norm_nid, 0);
     g.add_connection(mean_nid, 0, batch_norm_nid, 1);
     g.add_connection(var_nid, 0, batch_norm_nid, 2);
-    if(has_beta)
+    if (has_beta)
     {
         g.add_connection(beta_nid, 0, batch_norm_nid, 3);
     }
-    if(has_gamma)
+    if (has_gamma)
     {
         g.add_connection(gamma_nid, 0, batch_norm_nid, 4);
     }
@@ -197,7 +219,8 @@ NodeID GraphBuilder::add_batch_normalization_node(Graph &g, NodeParams params, N
     return batch_norm_nid;
 }
 
-NodeID GraphBuilder::add_bounding_box_transform_node(Graph &g, NodeParams params, NodeIdxPair input, NodeIdxPair deltas, BoundingBoxTransformInfo info)
+NodeID GraphBuilder::add_bounding_box_transform_node(
+    Graph &g, NodeParams params, NodeIdxPair input, NodeIdxPair deltas, BoundingBoxTransformInfo info)
 {
     check_nodeidx_pair(input, g);
     check_nodeidx_pair(deltas, g);
@@ -216,10 +239,17 @@ NodeID GraphBuilder::add_channel_shuffle_node(Graph &g, NodeParams params, NodeI
     return create_simple_single_input_output_node<ChannelShuffleLayerNode>(g, params, input, num_groups);
 }
 
-NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
-                                          Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info,
-                                          unsigned int num_groups, ConvolutionMethod method, FastMathHint fast_math_hint,
-                                          ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor,
+NodeID GraphBuilder::add_convolution_node(Graph                  &g,
+                                          NodeParams              params,
+                                          NodeIdxPair             input,
+                                          Size2D                  kernel_spatial_extend,
+                                          unsigned int            depth,
+                                          PadStrideInfo           conv_info,
+                                          unsigned int            num_groups,
+                                          ConvolutionMethod       method,
+                                          FastMathHint            fast_math_hint,
+                                          ITensorAccessorUPtr     weights_accessor,
+                                          ITensorAccessorUPtr     bias_accessor,
                                           const QuantizationInfo &weights_quant_info,
                                           const QuantizationInfo &out_quant_info)
 {
@@ -240,7 +270,7 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa
     w_desc.shape.set(get_dimension_idx(input_data_layout, DataLayoutDimension::CHANNEL),
                      get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL) / num_groups);
     w_desc.shape.set(get_dimension_idx(input_data_layout, DataLayoutDimension::BATCHES), depth);
-    if(!weights_quant_info.empty())
+    if (!weights_quant_info.empty())
     {
         w_desc.quant_info = weights_quant_info;
     }
@@ -249,11 +279,11 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa
 
     // Create bias nodes
     NodeID b_nid = EmptyNodeID;
-    if(has_bias)
+    if (has_bias)
     {
         TensorDescriptor b_desc = input_tensor_desc;
         b_desc.shape            = TensorShape(depth);
-        if(is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
+        if (is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
         {
             b_desc.data_type = DataType::S32;
         }
@@ -264,7 +294,7 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa
     NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, num_groups, method, fast_math_hint, out_quant_info);
     g.add_connection(input.node_id, input.index, conv_nid, 0);
     g.add_connection(w_nid, 0, conv_nid, 1);
-    if(has_bias)
+    if (has_bias)
     {
         g.add_connection(b_nid, 0, conv_nid, 2);
     }
@@ -273,8 +303,12 @@ NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPa
     return conv_nid;
 }
 
-NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdxPair input,
-                                            Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo deconv_info,
+NodeID GraphBuilder::add_deconvolution_node(Graph              &g,
+                                            NodeParams          params,
+                                            NodeIdxPair         input,
+                                            Size2D              kernel_spatial_extend,
+                                            unsigned int        depth,
+                                            PadStrideInfo       deconv_info,
                                             ITensorAccessorUPtr weights_accessor,
                                             ITensorAccessorUPtr bias_accessor)
 {
@@ -300,11 +334,11 @@ NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdx
 
     // Create bias nodes
     NodeID b_nid = EmptyNodeID;
-    if(has_bias)
+    if (has_bias)
     {
         TensorDescriptor b_desc = input_tensor_desc;
         b_desc.shape            = TensorShape(depth);
-        if(is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
+        if (is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
         {
             b_desc.data_type = DataType::S32;
         }
@@ -312,10 +346,10 @@ NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdx
     }
 
     // Create convolution node and connect
-    NodeID deconv_nid = g.add_node<DeconvolutionLayerNode>(descriptors::DeconvolutionLayerDescriptor{ deconv_info });
+    NodeID deconv_nid = g.add_node<DeconvolutionLayerNode>(descriptors::DeconvolutionLayerDescriptor{deconv_info});
     g.add_connection(input.node_id, input.index, deconv_nid, 0);
     g.add_connection(w_nid, 0, deconv_nid, 1);
-    if(has_bias)
+    if (has_bias)
     {
         g.add_connection(b_nid, 0, deconv_nid, 2);
     }
@@ -324,14 +358,26 @@ NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdx
     return deconv_nid;
 }
 
-NodeID GraphBuilder::add_concatenate_node(Graph &g, NodeParams params, const std::vector<NodeIdxPair> &inputs, const descriptors::ConcatLayerDescriptor &concat_descriptor)
+NodeID GraphBuilder::add_concatenate_node(Graph                                    &g,
+                                          NodeParams                                params,
+                                          const std::vector<NodeIdxPair>           &inputs,
+                                          const descriptors::ConcatLayerDescriptor &concat_descriptor)
 {
-    return create_simple_multiple_input_single_output_node<ConcatenateLayerNode>(g, params, inputs, inputs.size(), concat_descriptor);
+    return create_simple_multiple_input_single_output_node<ConcatenateLayerNode>(g, params, inputs, inputs.size(),
+                                                                                 concat_descriptor);
 }
 
-NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params, NodeIdxPair input, Size2D kernel_spatial_extend,
-                                                    PadStrideInfo conv_info, int depth_multiplier, DepthwiseConvolutionMethod method,
-                                                    ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor, const QuantizationInfo &quant_info, const QuantizationInfo &out_quant_info)
+NodeID GraphBuilder::add_depthwise_convolution_node(Graph                     &g,
+                                                    NodeParams                 params,
+                                                    NodeIdxPair                input,
+                                                    Size2D                     kernel_spatial_extend,
+                                                    PadStrideInfo              conv_info,
+                                                    int                        depth_multiplier,
+                                                    DepthwiseConvolutionMethod method,
+                                                    ITensorAccessorUPtr        weights_accessor,
+                                                    ITensorAccessorUPtr        bias_accessor,
+                                                    const QuantizationInfo    &quant_info,
+                                                    const QuantizationInfo    &out_quant_info)
 {
     check_nodeidx_pair(input, g);
     ARM_COMPUTE_ERROR_ON((kernel_spatial_extend.width == 0) || (kernel_spatial_extend.height == 0));
@@ -348,7 +394,7 @@ NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params,
     w_desc.shape.set(get_dimension_idx(input_data_layout, DataLayoutDimension::HEIGHT), kernel_spatial_extend.height);
     w_desc.shape.set(get_dimension_idx(input_data_layout, DataLayoutDimension::CHANNEL),
                      get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL) * depth_multiplier);
-    if(!quant_info.empty())
+    if (!quant_info.empty())
     {
         w_desc.quant_info = quant_info;
     }
@@ -357,12 +403,13 @@ NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params,
 
     // Create bias nodes
     NodeID b_nid = EmptyNodeID;
-    if(has_bias)
+    if (has_bias)
     {
         TensorDescriptor b_desc = input_tensor_desc;
-        b_desc.shape            = TensorShape(get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL) * depth_multiplier);
+        b_desc.shape =
+            TensorShape(get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL) * depth_multiplier);
 
-        if(is_data_type_quantized_asymmetric(b_desc.data_type))
+        if (is_data_type_quantized_asymmetric(b_desc.data_type))
         {
             b_desc.data_type = DataType::S32;
         }
@@ -374,7 +421,7 @@ NodeID GraphBuilder::add_depthwise_convolution_node(Graph &g, NodeParams params,
     NodeID conv_nid = g.add_node<DepthwiseConvolutionLayerNode>(conv_info, depth_multiplier, method, out_quant_info);
     g.add_connection(input.node_id, input.index, conv_nid, 0);
     g.add_connection(w_nid, 0, conv_nid, 1);
-    if(has_bias)
+    if (has_bias)
     {
         g.add_connection(b_nid, 0, conv_nid, 2);
     }
@@ -393,7 +440,12 @@ NodeID GraphBuilder::add_dequantization_node(Graph &g, NodeParams params, NodeId
     return create_simple_single_input_output_node<DequantizationLayerNode>(g, params, input);
 }
 
-NodeID GraphBuilder::add_detection_output_node(Graph &g, NodeParams params, NodeIdxPair input_loc, NodeIdxPair input_conf, NodeIdxPair input_priorbox, const DetectionOutputLayerInfo &detect_info)
+NodeID GraphBuilder::add_detection_output_node(Graph                          &g,
+                                               NodeParams                      params,
+                                               NodeIdxPair                     input_loc,
+                                               NodeIdxPair                     input_conf,
+                                               NodeIdxPair                     input_priorbox,
+                                               const DetectionOutputLayerInfo &detect_info)
 {
     check_nodeidx_pair(input_loc, g);
     check_nodeidx_pair(input_conf, g);
@@ -410,18 +462,24 @@ NodeID GraphBuilder::add_detection_output_node(Graph &g, NodeParams params, Node
     return detect_nid;
 }
 
-NodeID GraphBuilder::add_detection_post_process_node(Graph &g, NodeParams params, NodeIdxPair input_box_encoding, NodeIdxPair input_class_prediction, const DetectionPostProcessLayerInfo &detect_info,
-                                                     ITensorAccessorUPtr anchors_accessor, const QuantizationInfo &anchor_quant_info)
+NodeID GraphBuilder::add_detection_post_process_node(Graph                               &g,
+                                                     NodeParams                           params,
+                                                     NodeIdxPair                          input_box_encoding,
+                                                     NodeIdxPair                          input_class_prediction,
+                                                     const DetectionPostProcessLayerInfo &detect_info,
+                                                     ITensorAccessorUPtr                  anchors_accessor,
+                                                     const QuantizationInfo              &anchor_quant_info)
 {
     check_nodeidx_pair(input_box_encoding, g);
     check_nodeidx_pair(input_class_prediction, g);
 
     // Get input tensor descriptor
-    const TensorDescriptor input_box_encoding_tensor_desc = get_tensor_descriptor(g, g.node(input_box_encoding.node_id)->outputs()[0]);
+    const TensorDescriptor input_box_encoding_tensor_desc =
+        get_tensor_descriptor(g, g.node(input_box_encoding.node_id)->outputs()[0]);
 
     // Calculate anchor descriptor
     TensorDescriptor anchor_desc = input_box_encoding_tensor_desc;
-    if(!anchor_quant_info.empty())
+    if (!anchor_quant_info.empty())
     {
         anchor_desc.quant_info = anchor_quant_info;
     }
@@ -445,12 +503,13 @@ NodeID GraphBuilder::add_dummy_node(Graph &g, NodeParams params, NodeIdxPair inp
     return create_simple_single_input_output_node<DummyNode>(g, params, input, shape);
 }
 
-NodeID GraphBuilder::add_elementwise_node(Graph &g, NodeParams params, NodeIdxPair input0, NodeIdxPair input1, EltwiseOperation operation)
+NodeID GraphBuilder::add_elementwise_node(
+    Graph &g, NodeParams params, NodeIdxPair input0, NodeIdxPair input1, EltwiseOperation operation)
 {
     check_nodeidx_pair(input0, g);
     check_nodeidx_pair(input1, g);
 
-    NodeID nid = g.add_node<EltwiseLayerNode>(descriptors::EltwiseLayerDescriptor{ operation });
+    NodeID nid = g.add_node<EltwiseLayerNode>(descriptors::EltwiseLayerDescriptor{operation});
 
     g.add_connection(input0.node_id, input0.index, nid, 0);
     g.add_connection(input1.node_id, input1.index, nid, 1);
@@ -465,9 +524,15 @@ NodeID GraphBuilder::add_flatten_node(Graph &g, NodeParams params, NodeIdxPair i
     return create_simple_single_input_output_node<FlattenLayerNode>(g, params, input);
 }
 
-NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, NodeIdxPair input, unsigned int num_outputs,
-                                               NodeID weights_nid, NodeID bias_nid,
-                                               const FullyConnectedLayerInfo fc_info, const QuantizationInfo &out_quant_info)
+NodeID GraphBuilder::add_fully_connected_layer(Graph                        &g,
+                                               NodeParams                    params,
+                                               NodeIdxPair                   input,
+                                               unsigned int                  num_outputs,
+                                               NodeID                        weights_nid,
+                                               NodeID                        bias_nid,
+                                               const FullyConnectedLayerInfo fc_info,
+                                               const QuantizationInfo       &out_quant_info,
+                                               FastMathHint                  fast_math_hint)
 {
     check_nodeidx_pair(input, g);
     ARM_COMPUTE_ERROR_ON(num_outputs == 0);
@@ -479,10 +544,10 @@ NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, Node
     const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
 
     // Create fully connected node and connect
-    NodeID fc_nid = g.add_node<FullyConnectedLayerNode>(num_outputs, out_quant_info, fc_info);
+    NodeID fc_nid = g.add_node<FullyConnectedLayerNode>(num_outputs, out_quant_info, fc_info, fast_math_hint);
     g.add_connection(input.node_id, input.index, fc_nid, 0);
     g.add_connection(weights_nid, 0, fc_nid, 1);
-    if(has_bias)
+    if (has_bias)
     {
         g.add_connection(bias_nid, 0, fc_nid, 2);
     }
@@ -492,10 +557,16 @@ NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, Node
     return fc_nid;
 }
 
-NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, NodeIdxPair input, unsigned int num_outputs,
-                                               ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor,
+NodeID GraphBuilder::add_fully_connected_layer(Graph                        &g,
+                                               NodeParams                    params,
+                                               NodeIdxPair                   input,
+                                               unsigned int                  num_outputs,
+                                               ITensorAccessorUPtr           weights_accessor,
+                                               ITensorAccessorUPtr           bias_accessor,
                                                const FullyConnectedLayerInfo fc_info,
-                                               const QuantizationInfo &weights_quant_info, const QuantizationInfo &out_quant_info)
+                                               const QuantizationInfo       &weights_quant_info,
+                                               const QuantizationInfo       &out_quant_info,
+                                               FastMathHint                  fast_math_hint)
 {
     check_nodeidx_pair(input, g);
     ARM_COMPUTE_ERROR_ON(num_outputs == 0);
@@ -506,16 +577,17 @@ NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, Node
     const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
 
     // Create weights node
-    TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, fc_info, weights_quant_info);
+    TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs,
+                                                                                  fc_info, weights_quant_info);
     NodeID           w_nid  = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
 
     // Create bias nodes
     NodeID b_nid = EmptyNodeID;
-    if(has_bias)
+    if (has_bias)
     {
         TensorDescriptor b_desc = input_tensor_desc;
         b_desc.shape            = TensorShape(num_outputs);
-        if(is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
+        if (is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
         {
             b_desc.data_type = DataType::S32;
         }
@@ -523,10 +595,10 @@ NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, Node
     }
 
     // Create fully connected node and connect
-    NodeID fc_nid = g.add_node<FullyConnectedLayerNode>(num_outputs, out_quant_info, fc_info);
+    NodeID fc_nid = g.add_node<FullyConnectedLayerNode>(num_outputs, out_quant_info, fc_info, fast_math_hint);
     g.add_connection(input.node_id, input.index, fc_nid, 0);
     g.add_connection(w_nid, 0, fc_nid, 1);
-    if(has_bias)
+    if (has_bias)
     {
         g.add_connection(b_nid, 0, fc_nid, 2);
     }
@@ -536,7 +608,12 @@ NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, Node
     return fc_nid;
 }
 
-NodeID GraphBuilder::add_generate_proposals_node(Graph &g, NodeParams params, NodeIdxPair scores, NodeIdxPair deltas, NodeIdxPair anchors, GenerateProposalsInfo info)
+NodeID GraphBuilder::add_generate_proposals_node(Graph                &g,
+                                                 NodeParams            params,
+                                                 NodeIdxPair           scores,
+                                                 NodeIdxPair           deltas,
+                                                 NodeIdxPair           anchors,
+                                                 GenerateProposalsInfo info)
 {
     check_nodeidx_pair(scores, g);
     check_nodeidx_pair(deltas, g);
@@ -557,13 +634,14 @@ NodeID GraphBuilder::add_l2_normalize_node(Graph &g, NodeParams params, NodeIdxP
     return create_simple_single_input_output_node<L2NormalizeLayerNode>(g, params, input, axis, epsilon);
 }
 
-NodeID GraphBuilder::add_normalization_node(Graph &g, NodeParams params, NodeIdxPair input, NormalizationLayerInfo norm_info)
+NodeID
+GraphBuilder::add_normalization_node(Graph &g, NodeParams params, NodeIdxPair input, NormalizationLayerInfo norm_info)
 {
     return create_simple_single_input_output_node<NormalizationLayerNode>(g, params, input, norm_info);
 }
 
-NodeID GraphBuilder::add_normalize_planar_yuv_node(Graph &g, NodeParams params, NodeIdxPair input,
-                                                   ITensorAccessorUPtr mean_accessor, ITensorAccessorUPtr std_accessor)
+NodeID GraphBuilder::add_normalize_planar_yuv_node(
+    Graph &g, NodeParams params, NodeIdxPair input, ITensorAccessorUPtr mean_accessor, ITensorAccessorUPtr std_accessor)
 {
     check_nodeidx_pair(input, g);
 
@@ -588,12 +666,14 @@ NodeID GraphBuilder::add_normalize_planar_yuv_node(Graph &g, NodeParams params,
     return norm_planar_yuv_nid;
 }
 
-NodeID GraphBuilder::add_pad_node(Graph &g, NodeParams params, NodeIdxPair input, const PaddingList &paddings, PixelValue pad_value)
+NodeID GraphBuilder::add_pad_node(
+    Graph &g, NodeParams params, NodeIdxPair input, const PaddingList &paddings, PixelValue pad_value)
 {
     return create_simple_single_input_output_node<PadLayerNode>(g, params, input, paddings, pad_value);
 }
 
-NodeID GraphBuilder::add_permute_node(Graph &g, NodeParams params, NodeIdxPair input, PermutationVector perm, DataLayout layout)
+NodeID GraphBuilder::add_permute_node(
+    Graph &g, NodeParams params, NodeIdxPair input, PermutationVector perm, DataLayout layout)
 {
     return create_simple_single_input_output_node<PermuteLayerNode>(g, params, input, perm, layout);
 }
@@ -617,12 +697,18 @@ NodeID GraphBuilder::add_pooling_node(Graph &g, NodeParams params, NodeIdxPair i
     return create_simple_single_input_output_node<PoolingLayerNode>(g, params, input, pool_info);
 }
 
-NodeID GraphBuilder::add_print_node(Graph &g, NodeParams params, NodeIdxPair input, std::ostream &stream, const IOFormatInfo &format_info, const std::function<ITensor *(ITensor *)> transform)
+NodeID GraphBuilder::add_print_node(Graph                                    &g,
+                                    NodeParams                                params,
+                                    NodeIdxPair                               input,
+                                    std::ostream                             &stream,
+                                    const IOFormatInfo                       &format_info,
+                                    const std::function<ITensor *(ITensor *)> transform)
 {
     return create_simple_single_input_output_node<PrintLayerNode>(g, params, input, stream, format_info, transform);
 }
 
-NodeID GraphBuilder::add_priorbox_node(Graph &g, NodeParams params, NodeIdxPair input0, NodeIdxPair input1, const PriorBoxLayerInfo &prior_info)
+NodeID GraphBuilder::add_priorbox_node(
+    Graph &g, NodeParams params, NodeIdxPair input0, NodeIdxPair input1, const PriorBoxLayerInfo &prior_info)
 {
     check_nodeidx_pair(input0, g);
     check_nodeidx_pair(input1, g);
@@ -637,12 +723,16 @@ NodeID GraphBuilder::add_priorbox_node(Graph &g, NodeParams params, NodeIdxPair
     return prior_nid;
 }
 
-NodeID GraphBuilder::add_quantization_node(Graph &g, NodeParams params, NodeIdxPair input, const QuantizationInfo &out_quant_info)
+NodeID GraphBuilder::add_quantization_node(Graph                  &g,
+                                           NodeParams              params,
+                                           NodeIdxPair             input,
+                                           const QuantizationInfo &out_quant_info)
 {
     return create_simple_single_input_output_node<QuantizationLayerNode>(g, params, input, out_quant_info);
 }
 
-NodeID GraphBuilder::add_reduction_operation_node(Graph &g, NodeParams params, NodeIdxPair input, ReductionOperation op, int axis, bool keep_dims)
+NodeID GraphBuilder::add_reduction_operation_node(
+    Graph &g, NodeParams params, NodeIdxPair input, ReductionOperation op, int axis, bool keep_dims)
 {
     return create_simple_single_input_output_node<ReductionLayerNode>(g, params, input, op, axis, keep_dims);
 }
@@ -657,13 +747,14 @@ NodeID GraphBuilder::add_reshape_node(Graph &g, NodeParams params, NodeIdxPair i
     return create_simple_single_input_output_node<ReshapeLayerNode>(g, params, input, shape);
 }
 
-NodeID GraphBuilder::add_resize_node(Graph &g, NodeParams params, NodeIdxPair input, InterpolationPolicy policy,
-                                     float width_scale, float height_scale)
+NodeID GraphBuilder::add_resize_node(
+    Graph &g, NodeParams params, NodeIdxPair input, InterpolationPolicy policy, float width_scale, float height_scale)
 {
     return create_simple_single_input_output_node<ResizeLayerNode>(g, params, input, policy, width_scale, height_scale);
 }
 
-NodeID GraphBuilder::add_roi_align_node(Graph &g, NodeParams params, NodeIdxPair input, NodeIdxPair rois, ROIPoolingLayerInfo pool_info)
+NodeID GraphBuilder::add_roi_align_node(
+    Graph &g, NodeParams params, NodeIdxPair input, NodeIdxPair rois, ROIPoolingLayerInfo pool_info)
 {
     check_nodeidx_pair(input, g);
     check_nodeidx_pair(rois, g);
@@ -677,7 +768,11 @@ NodeID GraphBuilder::add_roi_align_node(Graph &g, NodeParams params, NodeIdxPair
     return nid;
 }
 
-NodeID GraphBuilder::add_scale_layer(Graph &g, const NodeParams &params, NodeIdxPair input, ITensorAccessorUPtr mul_accessor, ITensorAccessorUPtr add_accessor)
+NodeID GraphBuilder::add_scale_layer(Graph              &g,
+                                     const NodeParams   &params,
+                                     NodeIdxPair         input,
+                                     ITensorAccessorUPtr mul_accessor,
+                                     ITensorAccessorUPtr add_accessor)
 {
     check_nodeidx_pair(input, g);
 
@@ -687,22 +782,23 @@ NodeID GraphBuilder::add_scale_layer(Graph &g, const NodeParams &params, NodeIdx
 
     // Create mul node
     TensorDescriptor mul_desc = input_tensor_desc;
-    const size_t     C        = input_tensor_desc.shape[get_dimension_idx(input_data_layout, DataLayoutDimension::CHANNEL)];
+    const size_t     C = input_tensor_desc.shape[get_dimension_idx(input_data_layout, DataLayoutDimension::CHANNEL)];
     mul_desc.shape.set(get_dimension_idx(input_data_layout, DataLayoutDimension::WIDTH), 1);
     mul_desc.shape.set(get_dimension_idx(input_data_layout, DataLayoutDimension::HEIGHT), 1);
     mul_desc.shape.set(get_dimension_idx(input_data_layout, DataLayoutDimension::CHANNEL), C);
     NodeID      mul_const_nid   = add_const_node_with_name(g, params, "Mul", mul_desc, std::move(mul_accessor));
-    NodeIdxPair mul_const_nidxp = { mul_const_nid, 0 };
+    NodeIdxPair mul_const_nidxp = {mul_const_nid, 0};
 
     // Create add node
     TensorDescriptor add_desc        = mul_desc;
     NodeID           add_const_nid   = add_const_node_with_name(g, params, "Add", add_desc, std::move(add_accessor));
-    NodeIdxPair      add_const_nidxp = { add_const_nid, 0 };
+    NodeIdxPair      add_const_nidxp = {add_const_nid, 0};
 
     // Create node and connect
-    NodeID      mul_node      = GraphBuilder::add_elementwise_node(g, params, input, mul_const_nidxp, EltwiseOperation::Mul);
-    NodeIdxPair mulnode_nidxp = { mul_node, 0 };
-    NodeID      add_node      = GraphBuilder::add_elementwise_node(g, params, mulnode_nidxp, add_const_nidxp, EltwiseOperation::Add);
+    NodeID      mul_node = GraphBuilder::add_elementwise_node(g, params, input, mul_const_nidxp, EltwiseOperation::Mul);
+    NodeIdxPair mulnode_nidxp = {mul_node, 0};
+    NodeID      add_node =
+        GraphBuilder::add_elementwise_node(g, params, mulnode_nidxp, add_const_nidxp, EltwiseOperation::Add);
 
     return add_node;
 }
@@ -712,17 +808,25 @@ NodeID GraphBuilder::add_softmax_node(Graph &g, NodeParams params, NodeIdxPair i
     return create_simple_single_input_output_node<SoftmaxLayerNode>(g, params, input, beta);
 }
 
-NodeID GraphBuilder::add_slice_node(Graph &g, NodeParams params, NodeIdxPair input, Coordinates &starts, Coordinates &ends)
+NodeID
+GraphBuilder::add_slice_node(Graph &g, NodeParams params, NodeIdxPair input, Coordinates &starts, Coordinates &ends)
 {
     return create_simple_single_input_output_node<SliceLayerNode>(g, params, input, starts, ends);
 }
 
-NodeID GraphBuilder::add_split_node(Graph &g, NodeParams params, NodeIdxPair input, unsigned int num_splits, unsigned int axis)
+NodeID
+GraphBuilder::add_split_node(Graph &g, NodeParams params, NodeIdxPair input, unsigned int num_splits, unsigned int axis)
 {
     return create_simple_single_input_output_node<SplitLayerNode>(g, params, input, num_splits, axis);
 }
 
-NodeID GraphBuilder::add_strided_slice_node(Graph &g, NodeParams params, NodeIdxPair input, Coordinates &starts, Coordinates &ends, BiStrides &strides, StridedSliceLayerInfo info)
+NodeID GraphBuilder::add_strided_slice_node(Graph                &g,
+                                            NodeParams            params,
+                                            NodeIdxPair           input,
+                                            Coordinates          &starts,
+                                            Coordinates          &ends,
+                                            BiStrides            &strides,
+                                            StridedSliceLayerInfo info)
 {
     return create_simple_single_input_output_node<StridedSliceLayerNode>(g, params, input, starts, ends, strides, info);
 }
@@ -769,7 +873,8 @@ NodeID GraphBuilder::add_yolo_node(Graph &g, NodeParams params, NodeIdxPair inpu
     g.add_connection(input.node_id, input.index, cls, 0);
     g.add_connection(cls, 0, cls_act, 0);
 
-    NodeID concat = g.add_node<ConcatenateLayerNode>(3, descriptors::ConcatLayerDescriptor(DataLayoutDimension::CHANNEL));
+    NodeID concat =
+        g.add_node<ConcatenateLayerNode>(3, descriptors::ConcatLayerDescriptor(DataLayoutDimension::CHANNEL));
     set_node_params(g, concat, params);
     g.add_connection(act_box, 0, concat, 0);
     g.add_connection(imm, 0, concat, 1);
diff --git a/src/graph/GraphContext.cpp b/src/graph/GraphContext.cpp
index 7b74c2fe0e..10850aa259 100644
--- a/src/graph/GraphContext.cpp
+++ b/src/graph/GraphContext.cpp
@@ -24,15 +24,14 @@
 #include "arm_compute/graph/GraphContext.h"
 
 #include "arm_compute/graph.h"
-#include "arm_compute/graph/Utils.h"
 #include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/Utils.h"
 
 namespace arm_compute
 {
 namespace graph
 {
-GraphContext::GraphContext()
-    : _config(), _memory_managers(), _weights_managers()
+GraphContext::GraphContext() : _config(), _memory_managers(), _weights_managers()
 {
 }
 
@@ -56,7 +55,7 @@ void GraphContext::set_config(const GraphConfig &config)
 bool GraphContext::insert_memory_management_ctx(MemoryManagerContext &&memory_ctx)
 {
     Target target = memory_ctx.target;
-    if(target == Target::UNSPECIFIED || _memory_managers.find(target) != std::end(_memory_managers))
+    if (target == Target::UNSPECIFIED || _memory_managers.find(target) != std::end(_memory_managers))
     {
         return false;
     }
@@ -79,7 +78,7 @@ bool GraphContext::insert_weights_management_ctx(WeightsManagerContext &&weights
 {
     Target target = weights_managers.target;
 
-    if(_weights_managers.find(target) != std::end(_weights_managers))
+    if (_weights_managers.find(target) != std::end(_weights_managers))
     {
         return false;
     }
@@ -102,17 +101,17 @@ std::map<Target, WeightsManagerContext> &GraphContext::weights_managers()
 void GraphContext::finalize()
 {
     const size_t num_pools = 1;
-    for(auto &mm_obj : _memory_managers)
+    for (auto &mm_obj : _memory_managers)
     {
         ARM_COMPUTE_ERROR_ON(!mm_obj.second.allocator);
 
         // Finalize intra layer memory manager
-        if(mm_obj.second.intra_mm != nullptr)
+        if (mm_obj.second.intra_mm != nullptr)
         {
             mm_obj.second.intra_mm->populate(*mm_obj.second.allocator, num_pools);
         }
         // Finalize cross layer memory manager
-        if(mm_obj.second.cross_mm != nullptr)
+        if (mm_obj.second.cross_mm != nullptr)
         {
             mm_obj.second.cross_mm->populate(*mm_obj.second.allocator, num_pools);
         }
diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp
index e357f10401..58ae60d4cc 100644
--- a/src/graph/GraphManager.cpp
+++ b/src/graph/GraphManager.cpp
@@ -23,30 +23,32 @@
  */
 #include "arm_compute/graph/GraphManager.h"
 
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
+#include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h"
+#include "arm_compute/graph/detail/ExecutionHelpers.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/Logger.h"
 #include "arm_compute/graph/PassManager.h"
 #include "arm_compute/graph/TypePrinter.h"
 #include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h"
-#include "arm_compute/graph/detail/ExecutionHelpers.h"
 
-#include "arm_compute/graph/algorithms/TopologicalSort.h"
+#include "src/common/utils/Log.h"
 
 namespace arm_compute
 {
 namespace graph
 {
-GraphManager::GraphManager()
-    : _workloads()
+GraphManager::GraphManager() : _workloads()
 {
 }
 
 void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &pm, Target target)
 {
+    ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Initiate graph configuration!");
+
     // Check if graph has been registered
-    if(_workloads.find(graph.id()) != std::end(_workloads))
+    if (_workloads.find(graph.id()) != std::end(_workloads))
     {
         ARM_COMPUTE_ERROR("Graph is already registered!");
     }
@@ -59,7 +61,7 @@ void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &
 
     // In case CLVK is selected, use the CL backend and
     // update config
-    if(target == Target::CLVK)
+    if (target == Target::CLVK)
     {
         forced_target       = Target::CL;
         GraphConfig config  = ctx.config();
@@ -68,7 +70,7 @@ void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &
         ctx.set_config(config);
     }
 
-    if(!is_target_supported(target))
+    if (!is_target_supported(target))
     {
         forced_target = get_default_target();
         ARM_COMPUTE_LOG_GRAPH_INFO("Switching target from " << target << " to " << forced_target << std::endl);
@@ -102,7 +104,7 @@ void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &
     detail::prepare_all_tasks(workload);
 
     // Setup tensor memory (Allocate all tensors or setup transition manager)
-    if(ctx.config().use_transition_memory_manager)
+    if (ctx.config().use_transition_memory_manager)
     {
         detail::configure_transition_manager(graph, ctx, workload);
     }
@@ -121,14 +123,16 @@ void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &
 
 void GraphManager::execute_graph(Graph &graph)
 {
+    ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Initiate graph execution!");
+
     // Check if graph is finalized
     auto it = _workloads.find(graph.id());
     ARM_COMPUTE_ERROR_ON_MSG(it == std::end(_workloads), "Graph is not registered!");
 
-    while(true)
+    while (true)
     {
         // Call input accessors
-        if(!detail::call_all_input_node_accessors(it->second))
+        if (!detail::call_all_input_node_accessors(it->second))
         {
             return;
         }
@@ -137,7 +141,7 @@ void GraphManager::execute_graph(Graph &graph)
         detail::call_all_tasks(it->second);
 
         // Call output accessors
-        if(!detail::call_all_output_node_accessors(it->second))
+        if (!detail::call_all_output_node_accessors(it->second))
         {
             return;
         }
@@ -152,4 +156,4 @@ void GraphManager::invalidate_graph(Graph &graph)
     _workloads.erase(it);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp
index 93f085404a..83c3ef7e37 100644
--- a/src/graph/INode.cpp
+++ b/src/graph/INode.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018,2021,2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -75,17 +75,17 @@ void INode::set_assigned_target(Target target)
 
 void INode::set_output_tensor(TensorID tid, size_t idx)
 {
-    if(tid != NullTensorID && (idx < _outputs.size()) && (_graph->tensor(tid) != nullptr))
+    if (tid != NullTensorID && (idx < _outputs.size()) && (_graph->tensor(tid) != nullptr))
     {
         ARM_COMPUTE_ERROR_ON(_graph == nullptr);
         Tensor *updated_tensor = _graph->tensor(tid);
         _outputs[idx]          = tid;
 
         // Set tensor to all output edges of the node
-        for(auto &output_edge_id : _output_edges)
+        for (auto &output_edge_id : _output_edges)
         {
             auto output_edge = _graph->edge(output_edge_id);
-            if(output_edge != nullptr)
+            if (output_edge != nullptr)
             {
                 // Unbind edge from current tensor
                 auto current_output_tensor = output_edge->tensor();
@@ -200,4 +200,4 @@ Target INode::assigned_target() const
     return _assigned_target;
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/INodeVisitor.cpp b/src/graph/INodeVisitor.cpp
new file mode 100644
index 0000000000..90b2e3327f
--- /dev/null
+++ b/src/graph/INodeVisitor.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2021, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/INodeVisitor.h"
+
+#include "arm_compute/graph/nodes/Nodes.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+#ifndef DOXYGEN_SKIP_THIS
+void DefaultNodeVisitor::visit(INode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(ActivationLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(BatchNormalizationLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(ConcatenateLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(ConstNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(ConvolutionLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(DequantizationLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(DetectionOutputLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(DetectionPostProcessLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(DepthwiseConvolutionLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(EltwiseLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(FlattenLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(FullyConnectedLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(FusedConvolutionBatchNormalizationNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(FusedDepthwiseConvolutionBatchNormalizationNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(InputNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(NormalizationLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(OutputNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(PermuteLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(PoolingLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(PReluLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(PrintLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(PriorBoxLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(QuantizationLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(ReshapeLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(SoftmaxLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(SplitLayerNode &n)
+{
+    default_visit(n);
+}
+void DefaultNodeVisitor::visit(StackLayerNode &n)
+{
+    default_visit(n);
+}
+#endif /* DOXYGEN_SKIP_THIS */
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/PassManager.cpp b/src/graph/PassManager.cpp
index f7e214c1b4..9a889e1da3 100644
--- a/src/graph/PassManager.cpp
+++ b/src/graph/PassManager.cpp
@@ -29,8 +29,7 @@ namespace arm_compute
 {
 namespace graph
 {
-PassManager::PassManager()
-    : _passes()
+PassManager::PassManager() : _passes()
 {
 }
 
@@ -46,7 +45,7 @@ IGraphMutator *PassManager::pass(size_t index)
 
 void PassManager::append(std::unique_ptr<IGraphMutator> pass, bool conditional)
 {
-    if(pass && conditional)
+    if (pass && conditional)
     {
         ARM_COMPUTE_LOG_GRAPH_VERBOSE("Appending mutating pass : " << pass->name() << std::endl);
         _passes.push_back(std::move(pass));
@@ -60,9 +59,9 @@ void PassManager::clear()
 
 void PassManager::run_all(Graph &g)
 {
-    for(auto &pass : _passes)
+    for (auto &pass : _passes)
     {
-        if(pass)
+        if (pass)
         {
             ARM_COMPUTE_LOG_GRAPH_INFO("Running mutating pass : " << pass->name() << std::endl);
             pass->mutate(g);
@@ -72,9 +71,9 @@ void PassManager::run_all(Graph &g)
 
 void PassManager::run_type(Graph &g, IGraphMutator::MutationType type)
 {
-    for(auto &pass : _passes)
+    for (auto &pass : _passes)
     {
-        if(pass && (pass->type() == type))
+        if (pass && (pass->type() == type))
         {
             ARM_COMPUTE_LOG_GRAPH_INFO("Running mutating pass : " << pass->name() << std::endl);
             pass->mutate(g);
@@ -84,17 +83,17 @@ void PassManager::run_type(Graph &g, IGraphMutator::MutationType type)
 
 void PassManager::run_index(Graph &g, size_t index)
 {
-    if(index >= _passes.size())
+    if (index >= _passes.size())
     {
         return;
     }
 
     auto &pass = _passes.at(index);
-    if(pass != nullptr)
+    if (pass != nullptr)
     {
         ARM_COMPUTE_LOG_GRAPH_INFO("Running mutating pass : " << pass->name() << std::endl);
         pass->mutate(g);
     }
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/Tensor.cpp b/src/graph/Tensor.cpp
index f69d49d016..72679c4ea4 100644
--- a/src/graph/Tensor.cpp
+++ b/src/graph/Tensor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019,2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -75,25 +75,33 @@ std::unique_ptr<ITensorAccessor> Tensor::extract_accessor()
 bool Tensor::call_accessor()
 {
     // Early exit guard
-    if(!_accessor || !_handle)
+    if (!_accessor || !_handle)
     {
         return false;
     }
 
-    // Map tensor
-    _handle->map(true);
+    const bool access_data = _accessor->access_tensor_data();
 
-    // Return in case of null backend buffer
-    if(_handle->tensor().buffer() == nullptr)
+    if (access_data)
     {
-        return false;
+        // Map tensor
+        _handle->map(true);
+
+        // Return in case of null backend buffer
+        if (_handle->tensor().buffer() == nullptr)
+        {
+            return false;
+        }
     }
 
     // Call accessor
     bool retval = _accessor->access_tensor(_handle->tensor());
 
-    // Unmap tensor
-    _handle->unmap();
+    if (access_data)
+    {
+        // Unmap tensor
+        _handle->unmap();
+    }
 
     return retval;
 }
diff --git a/src/graph/TypeLoader.cpp b/src/graph/TypeLoader.cpp
index 3c51289dba..e1248fbb6b 100644
--- a/src/graph/TypeLoader.cpp
+++ b/src/graph/TypeLoader.cpp
@@ -31,10 +31,9 @@ namespace arm_compute
 {
 arm_compute::DataLayout data_layout_from_name(const std::string &name)
 {
-    static const std::map<std::string, arm_compute::DataLayout> data_layouts =
-    {
-        { "nhwc", DataLayout::NHWC },
-        { "nchw", DataLayout::NCHW },
+    static const std::map<std::string, arm_compute::DataLayout> data_layouts = {
+        {"nhwc", DataLayout::NHWC},
+        {"nchw", DataLayout::NCHW},
     };
 
 #ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
@@ -45,7 +44,7 @@ arm_compute::DataLayout data_layout_from_name(const std::string &name)
 
 #ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
     }
-    catch(const std::out_of_range &)
+    catch (const std::out_of_range &)
     {
         throw std::invalid_argument(name);
     }
@@ -55,11 +54,10 @@ namespace graph
 {
 Target target_from_name(const std::string &name)
 {
-    static const std::map<std::string, Target> targets =
-    {
-        { "neon", Target::NEON },
-        { "cl", Target::CL },
-        { "clvk", Target::CLVK },
+    static const std::map<std::string, Target> targets = {
+        {"neon", Target::NEON},
+        {"cl", Target::CL},
+        {"clvk", Target::CLVK},
     };
 
 #ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
@@ -70,7 +68,7 @@ Target target_from_name(const std::string &name)
 
 #ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
     }
-    catch(const std::out_of_range &)
+    catch (const std::out_of_range &)
     {
         throw std::invalid_argument(name);
     }
@@ -79,12 +77,11 @@ Target target_from_name(const std::string &name)
 
 ConvolutionMethod Convolution_method_from_name(const std::string &name)
 {
-    static const std::map<std::string, ConvolutionMethod> methods =
-    {
-        { "default", ConvolutionMethod::Default },
-        { "direct", ConvolutionMethod::Direct },
-        { "gemm", ConvolutionMethod::GEMM },
-        { "winograd", ConvolutionMethod::Winograd },
+    static const std::map<std::string, ConvolutionMethod> methods = {
+        {"default", ConvolutionMethod::Default},
+        {"direct", ConvolutionMethod::Direct},
+        {"gemm", ConvolutionMethod::GEMM},
+        {"winograd", ConvolutionMethod::Winograd},
     };
 
 #ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
@@ -95,7 +92,7 @@ ConvolutionMethod Convolution_method_from_name(const std::string &name)
 
 #ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
     }
-    catch(const std::out_of_range &)
+    catch (const std::out_of_range &)
     {
         throw std::invalid_argument(name);
     }
@@ -104,10 +101,9 @@ ConvolutionMethod Convolution_method_from_name(const std::string &name)
 
 DepthwiseConvolutionMethod depthwise_convolution_method_from_name(const std::string &name)
 {
-    static const std::map<std::string, DepthwiseConvolutionMethod> methods =
-    {
-        { "default", DepthwiseConvolutionMethod::Default },
-        { "optimized3x3", DepthwiseConvolutionMethod::Optimized3x3 },
+    static const std::map<std::string, DepthwiseConvolutionMethod> methods = {
+        {"default", DepthwiseConvolutionMethod::Default},
+        {"optimized3x3", DepthwiseConvolutionMethod::Optimized3x3},
     };
 
 #ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
@@ -118,7 +114,7 @@ DepthwiseConvolutionMethod depthwise_convolution_method_from_name(const std::str
 
 #ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
     }
-    catch(const std::out_of_range &)
+    catch (const std::out_of_range &)
     {
         throw std::invalid_argument(name);
     }
diff --git a/src/graph/Utils.cpp b/src/graph/Utils.cpp
index 37af1bff93..452d8ec7b2 100644
--- a/src/graph/Utils.cpp
+++ b/src/graph/Utils.cpp
@@ -23,8 +23,8 @@
  */
 #include "arm_compute/graph/Utils.h"
 
-#include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/mutators/GraphMutators.h"
 
 namespace arm_compute
@@ -33,16 +33,17 @@ namespace graph
 {
 bool is_target_supported(Target target)
 {
-    return backends::BackendRegistry::get().contains(target) && backends::BackendRegistry::get().find_backend(target)->is_backend_supported();
+    return backends::BackendRegistry::get().contains(target) &&
+           backends::BackendRegistry::get().find_backend(target)->is_backend_supported();
 }
 
 Target get_default_target()
 {
-    if(is_target_supported(Target::NEON))
+    if (is_target_supported(Target::NEON))
     {
         return Target::NEON;
     }
-    if(is_target_supported(Target::CL))
+    if (is_target_supported(Target::CL))
     {
         return Target::CL;
     }
@@ -52,18 +53,18 @@ Target get_default_target()
 void force_target_to_graph(Graph &g, Target target)
 {
     auto &nodes = g.nodes();
-    for(auto &node : nodes)
+    for (auto &node : nodes)
     {
-        if(node)
+        if (node)
         {
             node->set_assigned_target(target);
         }
     }
 
     auto &tensors = g.tensors();
-    for(auto &tensor : tensors)
+    for (auto &tensor : tensors)
     {
-        if(tensor)
+        if (tensor)
         {
             tensor->desc().target = target;
         }
@@ -76,9 +77,9 @@ PassManager create_default_pass_manager(Target target, const GraphConfig &cfg)
     PassManager pm;
 
     // Passes that mutate graph IR
-    if(cfg.use_synthetic_type)
+    if (cfg.use_synthetic_type)
     {
-        switch(cfg.synthetic_type)
+        switch (cfg.synthetic_type)
         {
             case DataType::QASYMM8:
             case DataType::QASYMM8_SIGNED:
@@ -107,21 +108,32 @@ PassManager create_default_pass_manager(Target target, const GraphConfig &cfg)
 
 void release_default_graph_context(GraphContext &ctx)
 {
-    for(const auto &backend : backends::BackendRegistry::get().backends())
+    for (const auto &backend : backends::BackendRegistry::get().backends())
     {
-        if(backend.second->is_backend_supported())
+        if (backend.second->is_backend_supported())
         {
             backend.second->release_backend_context(ctx);
         }
     }
 }
 
+void sync_backends()
+{
+    for (const auto &backend : backends::BackendRegistry::get().backends())
+    {
+        if (backend.second->backend_allocator())
+        {
+            backend.second->sync();
+        }
+    }
+}
+
 void setup_requested_backend_context(GraphContext &ctx, Target target)
 {
-    if(backends::BackendRegistry::get().contains(target))
+    if (backends::BackendRegistry::get().contains(target))
     {
         const auto &backend = backends::BackendRegistry::get().find_backend(target);
-        if(backend->is_backend_supported())
+        if (backend->is_backend_supported())
         {
             backend->setup_backend_context(ctx);
         }
@@ -130,20 +142,22 @@ void setup_requested_backend_context(GraphContext &ctx, Target target)
 
 size_t get_dimension_size(const TensorDescriptor &descriptor, const DataLayoutDimension data_layout_dimension)
 {
-    ARM_COMPUTE_ERROR_ON_MSG(descriptor.layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!");
+    ARM_COMPUTE_ERROR_ON_MSG(descriptor.layout == DataLayout::UNKNOWN,
+                             "Cannot retrieve the dimension index for an unknown layout!");
     return descriptor.shape[get_dimension_idx(descriptor.layout, data_layout_dimension)];
 }
 
 size_t get_dimension_idx(DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
 {
-    ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!");
+    ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN,
+                             "Cannot retrieve the dimension index for an unknown layout!");
 
     /* Return the index based on the data layout
      * [N C H W]
      * [3 2 1 0]
      * [N H W C]
      */
-    switch(data_layout_dimension)
+    switch (data_layout_dimension)
     {
         case DataLayoutDimension::CHANNEL:
             return (data_layout == DataLayout::NCHW) ? 2 : 0;
@@ -170,22 +184,42 @@ std::vector<NodeIdxPair> get_driving_nodes(const INode &node)
     const Graph *g = node.graph();
     ARM_COMPUTE_ERROR_ON(g == nullptr);
 
-    for(auto &output_edge_id : node.output_edges())
+    for (auto &output_edge_id : node.output_edges())
     {
         auto output_edge = g->edge(output_edge_id);
-        if(output_edge != nullptr)
+        if (output_edge != nullptr)
         {
             ARM_COMPUTE_ERROR_ON(output_edge->consumer() == nullptr);
-            driving_nodes.push_back({ output_edge->consumer_id(), output_edge->consumer_idx() });
+            driving_nodes.push_back({output_edge->consumer_id(), output_edge->consumer_idx()});
         }
     }
 
     return driving_nodes;
 }
 
+std::vector<NodeIdxPair> get_driver_nodes(const INode &node)
+{
+    std::vector<NodeIdxPair> driver_nodes;
+
+    const Graph *g = node.graph();
+    ARM_COMPUTE_ERROR_ON(g == nullptr);
+
+    for (auto &input_edge_id : node.input_edges())
+    {
+        auto input_edge = g->edge(input_edge_id);
+        if (input_edge != nullptr)
+        {
+            ARM_COMPUTE_ERROR_ON(input_edge->producer() == nullptr);
+            driver_nodes.push_back({input_edge->producer_id(), input_edge->producer_idx()});
+        }
+    }
+
+    return driver_nodes;
+}
+
 void configure_tensor(Tensor *tensor)
 {
-    if(tensor != nullptr && tensor->handle() == nullptr)
+    if (tensor != nullptr && tensor->handle() == nullptr)
     {
         Target                         target  = tensor->desc().target;
         backends::IDeviceBackend      &backend = backends::BackendRegistry::get().get_backend(target);
@@ -194,5 +228,6 @@ void configure_tensor(Tensor *tensor)
         tensor->set_handle(std::move(handle));
     }
 }
+
 } // namespace graph
 } // namespace arm_compute
diff --git a/src/graph/Workload.cpp b/src/graph/Workload.cpp
index b9d57295b0..9dddad7cbd 100644
--- a/src/graph/Workload.cpp
+++ b/src/graph/Workload.cpp
@@ -40,12 +40,12 @@ void ExecutionTask::operator()()
 
 void execute_task(ExecutionTask &task)
 {
-    if(task.task)
+    if (task.task)
     {
         task.task->run();
     }
 #ifdef ARM_COMPUTE_ASSERTS_ENABLED
-    else if(task.node->type() == NodeType::PrintLayer)
+    else if (task.node->type() == NodeType::PrintLayer)
     {
         auto print_node   = utils::cast::polymorphic_downcast<PrintLayerNode *>(task.node);
         auto input_handle = print_node->input(0)->handle();
@@ -61,14 +61,13 @@ void execute_task(ExecutionTask &task)
 
 void ExecutionTask::prepare()
 {
-    if(task)
+    if (task)
     {
         task->prepare();
     }
 }
 
-TaskExecutor::TaskExecutor()
-    : execute_function(execute_task)
+TaskExecutor::TaskExecutor() : execute_function(execute_task)
 {
 }
 
@@ -78,4 +77,4 @@ TaskExecutor &TaskExecutor::get()
     return executor;
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/algorithms/TopologicalSort.cpp b/src/graph/algorithms/TopologicalSort.cpp
index 3a69352471..08e14e1657 100644
--- a/src/graph/algorithms/TopologicalSort.cpp
+++ b/src/graph/algorithms/TopologicalSort.cpp
@@ -50,14 +50,14 @@ inline bool all_inputs_are_visited(const INode *node, const std::vector<bool> &v
     ARM_COMPUTE_ERROR_ON(graph == nullptr);
 
     bool are_all_visited = true;
-    for(const auto &input_edge_id : node->input_edges())
+    for (const auto &input_edge_id : node->input_edges())
     {
-        if(input_edge_id != EmptyNodeID)
+        if (input_edge_id != EmptyNodeID)
         {
             const Edge *input_edge = graph->edge(input_edge_id);
             ARM_COMPUTE_ERROR_ON(input_edge == nullptr);
             ARM_COMPUTE_ERROR_ON(input_edge->producer() == nullptr);
-            if(!visited[input_edge->producer_id()])
+            if (!visited[input_edge->producer_id()])
             {
                 are_all_visited = false;
                 break;
@@ -80,9 +80,9 @@ std::vector<NodeID> bfs(Graph &g)
     std::list<NodeID> queue;
 
     // Push inputs and mark as visited
-    for(auto &input : g.nodes(NodeType::Input))
+    for (auto &input : g.nodes(NodeType::Input))
     {
-        if(input != EmptyNodeID)
+        if (input != EmptyNodeID)
         {
             visited[input] = true;
             queue.push_back(input);
@@ -90,9 +90,9 @@ std::vector<NodeID> bfs(Graph &g)
     }
 
     // Push const nodes and mark as visited
-    for(auto &const_node : g.nodes(NodeType::Const))
+    for (auto &const_node : g.nodes(NodeType::Const))
     {
-        if(const_node != EmptyNodeID)
+        if (const_node != EmptyNodeID)
         {
             visited[const_node] = true;
             queue.push_back(const_node);
@@ -100,7 +100,7 @@ std::vector<NodeID> bfs(Graph &g)
     }
 
     // Iterate over vector and edges
-    while(!queue.empty())
+    while (!queue.empty())
     {
         // Dequeue a node from queue and process
         NodeID n = queue.front();
@@ -109,11 +109,11 @@ std::vector<NodeID> bfs(Graph &g)
 
         const INode *node = g.node(n);
         ARM_COMPUTE_ERROR_ON(node == nullptr);
-        for(const auto &eid : node->output_edges())
+        for (const auto &eid : node->output_edges())
         {
             const Edge *e = g.edge(eid);
             ARM_COMPUTE_ERROR_ON(e == nullptr);
-            if(!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited))
+            if (!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited))
             {
                 visited[e->consumer_id()] = true;
                 queue.push_back(e->consumer_id());
@@ -135,9 +135,9 @@ std::vector<NodeID> dfs(Graph &g)
     std::stack<NodeID> stack;
 
     // Push inputs and mark as visited
-    for(auto &input : g.nodes(NodeType::Input))
+    for (auto &input : g.nodes(NodeType::Input))
     {
-        if(input != EmptyNodeID)
+        if (input != EmptyNodeID)
         {
             visited[input] = true;
             stack.push(input);
@@ -145,9 +145,9 @@ std::vector<NodeID> dfs(Graph &g)
     }
 
     // Push const nodes and mark as visited
-    for(auto &const_node : g.nodes(NodeType::Const))
+    for (auto &const_node : g.nodes(NodeType::Const))
     {
-        if(const_node != EmptyNodeID)
+        if (const_node != EmptyNodeID)
         {
             visited[const_node] = true;
             stack.push(const_node);
@@ -155,7 +155,7 @@ std::vector<NodeID> dfs(Graph &g)
     }
 
     // Iterate over vector and edges
-    while(!stack.empty())
+    while (!stack.empty())
     {
         // Pop a node from stack and process
         NodeID n = stack.top();
@@ -163,7 +163,7 @@ std::vector<NodeID> dfs(Graph &g)
         stack.pop();
 
         // Mark node as visited
-        if(!visited[n])
+        if (!visited[n])
         {
             visited[n] = true;
         }
@@ -171,11 +171,11 @@ std::vector<NodeID> dfs(Graph &g)
         const INode *node = g.node(n);
         ARM_COMPUTE_ERROR_ON(node == nullptr);
         // Reverse iterate to push branches from right to left and pop on the opposite order
-        for(const auto &eid : arm_compute::utils::iterable::reverse_iterate(node->output_edges()))
+        for (const auto &eid : arm_compute::utils::iterable::reverse_iterate(node->output_edges()))
         {
             const Edge *e = g.edge(eid);
             ARM_COMPUTE_ERROR_ON(e == nullptr);
-            if(!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited))
+            if (!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited))
             {
                 stack.push(e->consumer_id());
             }
diff --git a/src/graph/backends/BackendRegistry.cpp b/src/graph/backends/BackendRegistry.cpp
index 46b4f99e23..bb6af79f8b 100644
--- a/src/graph/backends/BackendRegistry.cpp
+++ b/src/graph/backends/BackendRegistry.cpp
@@ -31,8 +31,7 @@ namespace graph
 {
 namespace backends
 {
-BackendRegistry::BackendRegistry()
-    : _registered_backends()
+BackendRegistry::BackendRegistry() : _registered_backends()
 {
 }
 
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index b6b25cc7d0..e27a4109d1 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -23,18 +23,17 @@
  */
 #include "arm_compute/graph/backends/CL/CLDeviceBackend.h"
 
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/INode.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/graph/backends/BackendRegistrar.h"
 #include "arm_compute/graph/backends/CL/CLFunctionFactory.h"
 #include "arm_compute/graph/backends/CL/CLNodeValidator.h"
 #include "arm_compute/graph/backends/CL/CLSubTensorHandle.h"
 #include "arm_compute/graph/backends/CL/CLTensorHandle.h"
-
-#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphContext.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Tensor.h"
 #include "arm_compute/runtime/BlobLifetimeManager.h"
 #include "arm_compute/runtime/CL/CLBufferAllocator.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
@@ -64,7 +63,12 @@ bool file_exists(const std::string &filename)
 static detail::BackendRegistrar<CLDeviceBackend> CLDeviceBackend_registrar(Target::CL);
 
 CLDeviceBackend::CLDeviceBackend()
-    : _context_count(0), _tuner(), _gemm_heuristics(), _allocator(nullptr), _tuner_file(), _backend_type(CLBackendType::Native)
+    : _context_count(0),
+      _tuner(),
+      _gemm_heuristics(),
+      _allocator(nullptr),
+      _tuner_file(),
+      _backend_type(CLBackendType::Native)
 {
 }
 
@@ -95,7 +99,7 @@ void CLDeviceBackend::release_backend_context(GraphContext &ctx)
 {
     ARM_COMPUTE_UNUSED(ctx);
     _context_count--;
-    if(_context_count == 0) // No more context using the backend: free resources
+    if (_context_count == 0) // No more context using the backend: free resources
     {
         _allocator = nullptr;
     }
@@ -105,7 +109,7 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
 {
     // Force backend initialization
     _context_count++;
-    if(_context_count == 1)
+    if (_context_count == 1)
     {
         _backend_type = ctx.config().backend_type;
         initialize_backend();
@@ -115,7 +119,7 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
     _tuner_file = ctx.config().tuner_file;
 
     // Load tuner data if available
-    if(file_exists(_tuner_file))
+    if (file_exists(_tuner_file))
     {
         _tuner.load_from_file(_tuner_file);
     }
@@ -128,7 +132,7 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
     CLScheduler::get().gemm_heuristics()->reload_from_file(ctx.config().mlgo_file);
 
     // Setup a management backend
-    if(ctx.memory_management_ctx(Target::CL) == nullptr)
+    if (ctx.memory_management_ctx(Target::CL) == nullptr)
     {
         MemoryManagerContext mm_ctx;
         mm_ctx.target      = Target::CL;
@@ -141,7 +145,7 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
     }
 
     // Create function level weights manager
-    if(ctx.weights_management_ctx(Target::CL) == nullptr)
+    if (ctx.weights_management_ctx(Target::CL) == nullptr)
     {
         WeightsManagerContext wm_ctx;
         wm_ctx.target = Target::CL;
@@ -174,9 +178,10 @@ std::unique_ptr<ITensorHandle> CLDeviceBackend::create_tensor(const Tensor &tens
     return std::make_unique<CLTensorHandle>(info);
 }
 
-std::unique_ptr<ITensorHandle> CLDeviceBackend::create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent)
+std::unique_ptr<ITensorHandle>
+CLDeviceBackend::create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent)
 {
-    if(parent == nullptr)
+    if (parent == nullptr)
     {
         return nullptr;
     }
@@ -203,7 +208,7 @@ arm_compute::Status CLDeviceBackend::validate_node(INode &node)
 
 std::shared_ptr<arm_compute::IMemoryManager> CLDeviceBackend::create_memory_manager(MemoryManagerAffinity affinity)
 {
-    if(affinity == MemoryManagerAffinity::Offset)
+    if (affinity == MemoryManagerAffinity::Offset)
     {
         ARM_COMPUTE_LOG_GRAPH_WARNING("CL Backend does not support offset affinity memory management!");
         return nullptr;
@@ -221,6 +226,11 @@ std::shared_ptr<arm_compute::IWeightsManager> CLDeviceBackend::create_weights_ma
     auto weights_mgr = std::make_shared<IWeightsManager>();
     return weights_mgr;
 }
+
+void CLDeviceBackend::sync()
+{
+    CLScheduler::get().sync();
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index 1cd3f9f9c7..d4e1aa880f 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,12 +22,12 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph/backends/CL/CLFunctionFactory.h"
-
+#include "arm_compute/graph/backends/FunctionHelpers.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/backends/FunctionHelpers.h"
 #include "arm_compute/runtime/CL/CLFunctions.h"
 #include "arm_compute/runtime/CPP/CPPFunctions.h"
+
 #include "src/core/CL/CLKernels.h"
 #include "support/Cast.h"
 
@@ -81,6 +81,7 @@ struct CLFusedLayerTypes
     using ConvolutionLayer          = CLConvolutionLayer;
     using DepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
     using FuseBatchNormalization    = CLFuseBatchNormalization;
+    using GEMMConvolutionLayer      = CLGEMMConvolutionLayer;
 };
 
 /** Wrapper for the CPP Function in the OpenCL backend **/
@@ -88,20 +89,19 @@ class CPPWrapperFunction : public IFunction
 {
 public:
     /* Default constructor */
-    CPPWrapperFunction()
-        : _tensors(), _func(nullptr)
+    CPPWrapperFunction() : _tensors(), _func(nullptr)
     {
     }
 
     void run() override
     {
-        for(auto &tensor : _tensors)
+        for (auto &tensor : _tensors)
         {
             tensor->map(CLScheduler::get().queue());
         }
         _func->run();
 
-        for(auto &tensor : _tensors)
+        for (auto &tensor : _tensors)
         {
             tensor->unmap(CLScheduler::get().queue());
         }
@@ -126,7 +126,8 @@ namespace detail
 {
 // Specialized functions
 template <>
-std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(DetectionOutputLayerNode &node)
+std::unique_ptr<IFunction>
+create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(DetectionOutputLayerNode &node)
 {
     validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
 
@@ -148,16 +149,12 @@ std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer
 
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
-                               << node.name()
-                               << " Type: " << node.type()
-                               << " Target: " << CLTargetInfo::TargetType
-                               << " Data Type: " << input0->info()->data_type()
-                               << " Input0 shape: " << input0->info()->tensor_shape()
-                               << " Input1 shape: " << input1->info()->tensor_shape()
+                               << node.name() << " Type: " << node.type() << " Target: " << CLTargetInfo::TargetType
+                               << " Data Type: " << input0->info()->data_type() << " Input0 shape: "
+                               << input0->info()->tensor_shape() << " Input1 shape: " << input1->info()->tensor_shape()
                                << " Input2 shape: " << input2->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
-                               << " DetectionOutputLayer info: " << detect_info
-                               << std::endl);
+                               << " DetectionOutputLayer info: " << detect_info << std::endl);
 
     auto wrap_function = std::make_unique<CPPWrapperFunction>();
 
@@ -170,7 +167,8 @@ std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer
     return std::move(wrap_function);
 }
 template <>
-std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(DetectionPostProcessLayerNode &node)
+std::unique_ptr<IFunction>
+create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(DetectionPostProcessLayerNode &node)
 {
     validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 4 /* expected outputs */);
 
@@ -198,19 +196,15 @@ std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostP
 
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
-                               << node.name()
-                               << " Type: " << node.type()
-                               << " Target: " << CLTargetInfo::TargetType
-                               << " Data Type: " << input0->info()->data_type()
-                               << " Input0 shape: " << input0->info()->tensor_shape()
-                               << " Input1 shape: " << input1->info()->tensor_shape()
+                               << node.name() << " Type: " << node.type() << " Target: " << CLTargetInfo::TargetType
+                               << " Data Type: " << input0->info()->data_type() << " Input0 shape: "
+                               << input0->info()->tensor_shape() << " Input1 shape: " << input1->info()->tensor_shape()
                                << " Input2 shape: " << input2->info()->tensor_shape()
                                << " Output0 shape: " << output0->info()->tensor_shape()
                                << " Output1 shape: " << output1->info()->tensor_shape()
                                << " Output2 shape: " << output2->info()->tensor_shape()
                                << " Output3 shape: " << output3->info()->tensor_shape()
-                               << " DetectionPostProcessLayer info: " << detect_info
-                               << std::endl);
+                               << " DetectionPostProcessLayer info: " << detect_info << std::endl);
 
     auto wrap_function = std::make_unique<CPPWrapperFunction>();
 
@@ -229,92 +223,128 @@ std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostP
 
 std::unique_ptr<IFunction> CLFunctionFactory::create(INode *node, GraphContext &ctx)
 {
-    if(node == nullptr)
+    if (node == nullptr)
     {
         return nullptr;
     }
 
     NodeType type = node->type();
-    switch(type)
+    switch (type)
     {
         case NodeType::ActivationLayer:
-            return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
+            return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(
+                *polymorphic_downcast<ActivationLayerNode *>(node));
         case NodeType::ArgMinMaxLayer:
-            return detail::create_arg_min_max_layer<CLArgMinMaxLayer, CLTargetInfo>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
+            return detail::create_arg_min_max_layer<CLArgMinMaxLayer, CLTargetInfo>(
+                *polymorphic_downcast<ArgMinMaxLayerNode *>(node));
         case NodeType::BatchNormalizationLayer:
-            return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+            return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(
+                *polymorphic_downcast<BatchNormalizationLayerNode *>(node));
         case NodeType::BoundingBoxTransformLayer:
-            return detail::create_bounding_box_transform_layer<CLBoundingBoxTransform, CLTargetInfo>(*polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
+            return detail::create_bounding_box_transform_layer<CLBoundingBoxTransform, CLTargetInfo>(
+                *polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
         case NodeType::ChannelShuffleLayer:
-            return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
+            return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(
+                *polymorphic_downcast<ChannelShuffleLayerNode *>(node));
         case NodeType::ConvolutionLayer:
-            return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
+            return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(
+                *polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
         case NodeType::DeconvolutionLayer:
-            return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
+            return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(
+                *polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
         case NodeType::ConcatenateLayer:
-            return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
+            return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(
+                *polymorphic_downcast<ConcatenateLayerNode *>(node));
         case NodeType::DepthToSpaceLayer:
-            return detail::create_depth_to_space_layer<CLDepthToSpaceLayer, CLTargetInfo>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
+            return detail::create_depth_to_space_layer<CLDepthToSpaceLayer, CLTargetInfo>(
+                *polymorphic_downcast<DepthToSpaceLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
-            return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+            return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayer, CLTargetInfo>(
+                *polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::DequantizationLayer:
-            return detail::create_dequantization_layer<CLDequantizationLayer, CLTargetInfo>(*polymorphic_downcast<DequantizationLayerNode *>(node));
+            return detail::create_dequantization_layer<CLDequantizationLayer, CLTargetInfo>(
+                *polymorphic_downcast<DequantizationLayerNode *>(node));
         case NodeType::DetectionOutputLayer:
-            return detail::create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
+            return detail::create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(
+                *polymorphic_downcast<DetectionOutputLayerNode *>(node));
         case NodeType::DetectionPostProcessLayer:
-            return detail::create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+            return detail::create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(
+                *polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
         case NodeType::EltwiseLayer:
-            return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
+            return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(
+                *polymorphic_downcast<EltwiseLayerNode *>(node));
         case NodeType::UnaryEltwiseLayer:
-            return detail::create_unary_eltwise_layer<CLUnaryEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
+            return detail::create_unary_eltwise_layer<CLUnaryEltwiseFunctions, CLTargetInfo>(
+                *polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
         case NodeType::FlattenLayer:
-            return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
+            return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(
+                *polymorphic_downcast<FlattenLayerNode *>(node));
         case NodeType::FullyConnectedLayer:
-            return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
+            return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(
+                *polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
         case NodeType::FusedConvolutionBatchNormalizationLayer:
-            return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
+            return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(
+                *polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
         case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer:
-            return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
+            return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes,
+                                                                                        CLTargetInfo>(
+                *polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
         case NodeType::GenerateProposalsLayer:
-            return detail::create_generate_proposals_layer<CLGenerateProposalsLayer, CLTargetInfo>(*polymorphic_downcast<GenerateProposalsLayerNode *>(node), ctx);
+            return detail::create_generate_proposals_layer<CLGenerateProposalsLayer, CLTargetInfo>(
+                *polymorphic_downcast<GenerateProposalsLayerNode *>(node), ctx);
         case NodeType::L2NormalizeLayer:
-            return detail::create_l2_normalize_layer<CLL2NormalizeLayer, CLTargetInfo>(*polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
+            return detail::create_l2_normalize_layer<CLL2NormalizeLayer, CLTargetInfo>(
+                *polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
         case NodeType::NormalizationLayer:
-            return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
+            return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(
+                *polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
         case NodeType::NormalizePlanarYUVLayer:
-            return detail::create_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer, CLTargetInfo>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
+            return detail::create_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer, CLTargetInfo>(
+                *polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
         case NodeType::PadLayer:
             return detail::create_pad_layer<CLPadLayer, CLTargetInfo>(*polymorphic_downcast<PadLayerNode *>(node));
         case NodeType::PermuteLayer:
-            return detail::create_permute_layer<CLPermute, CLTargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
+            return detail::create_permute_layer<CLPermute, CLTargetInfo>(
+                *polymorphic_downcast<PermuteLayerNode *>(node));
         case NodeType::PoolingLayer:
-            return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
+            return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(
+                *polymorphic_downcast<PoolingLayerNode *>(node));
         case NodeType::PReluLayer:
-            return detail::create_prelu_layer<CLPReluLayer, CLTargetInfo>(*polymorphic_downcast<PReluLayerNode *>(node));
+            return detail::create_prelu_layer<CLPReluLayer, CLTargetInfo>(
+                *polymorphic_downcast<PReluLayerNode *>(node));
         case NodeType::PrintLayer:
             return detail::create_print_layer<CLTargetInfo>(*polymorphic_downcast<PrintLayerNode *>(node));
         case NodeType::PriorBoxLayer:
-            return detail::create_priorbox_layer<CLPriorBoxLayer, CLTargetInfo>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
+            return detail::create_priorbox_layer<CLPriorBoxLayer, CLTargetInfo>(
+                *polymorphic_downcast<PriorBoxLayerNode *>(node));
         case NodeType::QuantizationLayer:
-            return detail::create_quantization_layer<CLQuantizationLayer, CLTargetInfo>(*polymorphic_downcast<QuantizationLayerNode *>(node));
+            return detail::create_quantization_layer<CLQuantizationLayer, CLTargetInfo>(
+                *polymorphic_downcast<QuantizationLayerNode *>(node));
         case NodeType::ReductionOperationLayer:
-            return detail::create_reduction_operation_layer<CLReductionOperation, CLTargetInfo>(*polymorphic_downcast<ReductionLayerNode *>(node), ctx);
+            return detail::create_reduction_operation_layer<CLReductionOperation, CLTargetInfo>(
+                *polymorphic_downcast<ReductionLayerNode *>(node), ctx);
         case NodeType::ReorgLayer:
-            return detail::create_reorg_layer<CLReorgLayer, CLTargetInfo>(*polymorphic_downcast<ReorgLayerNode *>(node));
+            return detail::create_reorg_layer<CLReorgLayer, CLTargetInfo>(
+                *polymorphic_downcast<ReorgLayerNode *>(node));
         case NodeType::ReshapeLayer:
-            return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
+            return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(
+                *polymorphic_downcast<ReshapeLayerNode *>(node));
         case NodeType::ResizeLayer:
             return detail::create_resize_layer<CLScale, CLTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
         case NodeType::ROIAlignLayer:
-            return detail::create_roi_align_layer<CLROIAlignLayer, CLTargetInfo>(*polymorphic_downcast<ROIAlignLayerNode *>(node));
+            return detail::create_roi_align_layer<CLROIAlignLayer, CLTargetInfo>(
+                *polymorphic_downcast<ROIAlignLayerNode *>(node));
         case NodeType::SliceLayer:
             return detail::create_slice_layer<CLSlice, CLTargetInfo>(*polymorphic_downcast<SliceLayerNode *>(node));
         case NodeType::SoftmaxLayer:
-            return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
+            return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(
+                *polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
         case NodeType::StackLayer:
-            return detail::create_stack_layer<CLStackLayer, CLTargetInfo>(*polymorphic_downcast<StackLayerNode *>(node));
+            return detail::create_stack_layer<CLStackLayer, CLTargetInfo>(
+                *polymorphic_downcast<StackLayerNode *>(node));
         case NodeType::StridedSliceLayer:
-            return detail::create_strided_slice_layer<CLStridedSlice, CLTargetInfo>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
+            return detail::create_strided_slice_layer<CLStridedSlice, CLTargetInfo>(
+                *polymorphic_downcast<StridedSliceLayerNode *>(node));
         default:
             return nullptr;
     }
diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp
index 8e3b4c8705..510eda7935 100644
--- a/src/graph/backends/CL/CLNodeValidator.cpp
+++ b/src/graph/backends/CL/CLNodeValidator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,6 @@
 
 #include "arm_compute/graph/backends/ValidateHelpers.h"
 #include "arm_compute/graph/nodes/Nodes.h"
-
 #include "arm_compute/runtime/CL/CLFunctions.h"
 #include "arm_compute/runtime/CPP/CPPFunctions.h"
 
@@ -57,41 +56,51 @@ struct CLUnaryEltwiseLayerFunctions
 
 Status CLNodeValidator::validate(INode *node)
 {
-    if(node == nullptr)
+    if (node == nullptr)
     {
         return Status{};
     }
 
     NodeType type = node->type();
-    switch(type)
+    switch (type)
     {
         case NodeType::ArgMinMaxLayer:
-            return detail::validate_arg_min_max_layer<CLArgMinMaxLayer>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
+            return detail::validate_arg_min_max_layer<CLArgMinMaxLayer>(
+                *polymorphic_downcast<ArgMinMaxLayerNode *>(node));
         case NodeType::BoundingBoxTransformLayer:
-            return detail::validate_bounding_box_transform_layer<CLBoundingBoxTransform>(*polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
+            return detail::validate_bounding_box_transform_layer<CLBoundingBoxTransform>(
+                *polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
         case NodeType::ChannelShuffleLayer:
-            return detail::validate_channel_shuffle_layer<CLChannelShuffleLayer>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
+            return detail::validate_channel_shuffle_layer<CLChannelShuffleLayer>(
+                *polymorphic_downcast<ChannelShuffleLayerNode *>(node));
         case NodeType::ConvolutionLayer:
-            return detail::validate_convolution_layer<CLConvolutionLayer,
-                   CLDirectConvolutionLayer,
-                   CLGEMMConvolutionLayer,
-                   CLWinogradConvolutionLayer>(*polymorphic_downcast<ConvolutionLayerNode *>(node));
+            return detail::validate_convolution_layer<CLConvolutionLayer, CLDirectConvolutionLayer,
+                                                      CLGEMMConvolutionLayer, CLWinogradConvolutionLayer>(
+                *polymorphic_downcast<ConvolutionLayerNode *>(node));
         case NodeType::DepthToSpaceLayer:
-            return detail::validate_depth_to_space_layer<CLDepthToSpaceLayer>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
+            return detail::validate_depth_to_space_layer<CLDepthToSpaceLayer>(
+                *polymorphic_downcast<DepthToSpaceLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
-            return detail::validate_depthwise_convolution_layer<CLDepthwiseConvolutionLayer>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+            return detail::validate_depthwise_convolution_layer<CLDepthwiseConvolutionLayer>(
+                *polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::DequantizationLayer:
-            return detail::validate_dequantization_layer<CLDequantizationLayer>(*polymorphic_downcast<DequantizationLayerNode *>(node));
+            return detail::validate_dequantization_layer<CLDequantizationLayer>(
+                *polymorphic_downcast<DequantizationLayerNode *>(node));
         case NodeType::DetectionOutputLayer:
-            return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
+            return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(
+                *polymorphic_downcast<DetectionOutputLayerNode *>(node));
         case NodeType::DetectionPostProcessLayer:
-            return detail::validate_detection_post_process_layer<CPPDetectionPostProcessLayer>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+            return detail::validate_detection_post_process_layer<CPPDetectionPostProcessLayer>(
+                *polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
         case NodeType::GenerateProposalsLayer:
-            return detail::validate_generate_proposals_layer<CLGenerateProposalsLayer>(*polymorphic_downcast<GenerateProposalsLayerNode *>(node));
+            return detail::validate_generate_proposals_layer<CLGenerateProposalsLayer>(
+                *polymorphic_downcast<GenerateProposalsLayerNode *>(node));
         case NodeType::L2NormalizeLayer:
-            return detail::validate_l2_normalize_layer<CLL2NormalizeLayer>(*polymorphic_downcast<L2NormalizeLayerNode *>(node));
+            return detail::validate_l2_normalize_layer<CLL2NormalizeLayer>(
+                *polymorphic_downcast<L2NormalizeLayerNode *>(node));
         case NodeType::NormalizePlanarYUVLayer:
-            return detail::validate_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
+            return detail::validate_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer>(
+                *polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
         case NodeType::PadLayer:
             return detail::validate_pad_layer<CLPadLayer>(*polymorphic_downcast<PadLayerNode *>(node));
         case NodeType::PermuteLayer:
@@ -101,9 +110,11 @@ Status CLNodeValidator::validate(INode *node)
         case NodeType::PriorBoxLayer:
             return detail::validate_priorbox_layer<CLPriorBoxLayer>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
         case NodeType::QuantizationLayer:
-            return detail::validate_quantization_layer<CLQuantizationLayer>(*polymorphic_downcast<QuantizationLayerNode *>(node));
+            return detail::validate_quantization_layer<CLQuantizationLayer>(
+                *polymorphic_downcast<QuantizationLayerNode *>(node));
         case NodeType::ReductionOperationLayer:
-            return detail::validate_reduction_operation_layer<CLReductionOperation>(*polymorphic_downcast<ReductionLayerNode *>(node));
+            return detail::validate_reduction_operation_layer<CLReductionOperation>(
+                *polymorphic_downcast<ReductionLayerNode *>(node));
         case NodeType::ReorgLayer:
             return detail::validate_reorg_layer<CLReorgLayer>(*polymorphic_downcast<ReorgLayerNode *>(node));
         case NodeType::ReshapeLayer:
@@ -113,11 +124,14 @@ Status CLNodeValidator::validate(INode *node)
         case NodeType::SliceLayer:
             return detail::validate_slice_layer<CLSlice>(*polymorphic_downcast<SliceLayerNode *>(node));
         case NodeType::StridedSliceLayer:
-            return detail::validate_strided_slice_layer<CLStridedSlice>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
+            return detail::validate_strided_slice_layer<CLStridedSlice>(
+                *polymorphic_downcast<StridedSliceLayerNode *>(node));
         case NodeType::EltwiseLayer:
-            return detail::validate_eltwise_Layer<CLEltwiseLayerFunctions>(*polymorphic_downcast<EltwiseLayerNode *>(node));
+            return detail::validate_eltwise_Layer<CLEltwiseLayerFunctions>(
+                *polymorphic_downcast<EltwiseLayerNode *>(node));
         case NodeType::UnaryEltwiseLayer:
-            return detail::validate_unary_eltwise_layer<CLUnaryEltwiseLayerFunctions>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
+            return detail::validate_unary_eltwise_layer<CLUnaryEltwiseLayerFunctions>(
+                *polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
         default:
             return Status{};
     }
diff --git a/src/graph/backends/CL/CLSubTensorHandle.cpp b/src/graph/backends/CL/CLSubTensorHandle.cpp
index b97d25890a..ccdc877a18 100644
--- a/src/graph/backends/CL/CLSubTensorHandle.cpp
+++ b/src/graph/backends/CL/CLSubTensorHandle.cpp
@@ -31,7 +31,10 @@ namespace graph
 {
 namespace backends
 {
-CLSubTensorHandle::CLSubTensorHandle(ITensorHandle *parent_handle, const TensorShape &shape, const Coordinates &coords, bool extend_parent)
+CLSubTensorHandle::CLSubTensorHandle(ITensorHandle     *parent_handle,
+                                     const TensorShape &shape,
+                                     const Coordinates &coords,
+                                     bool               extend_parent)
     : _sub_tensor(), _parent_handle(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(!parent_handle);
@@ -98,4 +101,4 @@ Target CLSubTensorHandle::target() const
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/CL/CLTensorHandle.cpp b/src/graph/backends/CL/CLTensorHandle.cpp
index a496c2ce47..1b69f9dede 100644
--- a/src/graph/backends/CL/CLTensorHandle.cpp
+++ b/src/graph/backends/CL/CLTensorHandle.cpp
@@ -31,8 +31,7 @@ namespace graph
 {
 namespace backends
 {
-CLTensorHandle::CLTensorHandle(const ITensorInfo &info)
-    : _tensor()
+CLTensorHandle::CLTensorHandle(const ITensorInfo &info) : _tensor()
 {
     _tensor.allocator()->init(info);
 }
@@ -49,7 +48,7 @@ void CLTensorHandle::free()
 
 void CLTensorHandle::manage(IMemoryGroup *mg)
 {
-    if(mg != nullptr)
+    if (mg != nullptr)
     {
         mg->manage(&_tensor);
     }
@@ -68,7 +67,7 @@ void CLTensorHandle::unmap()
 void CLTensorHandle::release_if_unused()
 {
     // TODO (geopin01): Release tensor only if all sub-tensors are marked as not used
-    if(!_tensor.is_used())
+    if (!_tensor.is_used())
     {
         _tensor.allocator()->free();
     }
@@ -100,4 +99,4 @@ Target CLTensorHandle::target() const
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index 9efa3ac0c8..fc7b309803 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021,2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,18 +23,17 @@
  */
 #include "arm_compute/graph/backends/NEON/NEDeviceBackend.h"
 
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/INode.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/graph/backends/BackendRegistrar.h"
 #include "arm_compute/graph/backends/NEON/NEFunctionFactory.h"
 #include "arm_compute/graph/backends/NEON/NENodeValidator.h"
 #include "arm_compute/graph/backends/NEON/NESubTensorHandle.h"
 #include "arm_compute/graph/backends/NEON/NETensorHandle.h"
-
-#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphContext.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Tensor.h"
 #include "arm_compute/runtime/Allocator.h"
 #include "arm_compute/runtime/BlobLifetimeManager.h"
 #include "arm_compute/runtime/IWeightsManager.h"
@@ -44,8 +43,6 @@
 #include "arm_compute/runtime/PoolManager.h"
 #include "arm_compute/runtime/Scheduler.h"
 
-#include "support/ToolchainSupport.h"
-
 namespace arm_compute
 {
 namespace graph
@@ -55,8 +52,7 @@ namespace backends
 /** Register CPU backend */
 static detail::BackendRegistrar<NEDeviceBackend> NEDeviceBackend_registrar(Target::NEON);
 
-NEDeviceBackend::NEDeviceBackend()
-    : _allocator()
+NEDeviceBackend::NEDeviceBackend() : _allocator()
 {
 }
 
@@ -74,13 +70,13 @@ void NEDeviceBackend::release_backend_context(GraphContext &ctx)
 void NEDeviceBackend::setup_backend_context(GraphContext &ctx)
 {
     // Set number of threads
-    if(ctx.config().num_threads >= 0)
+    if (ctx.config().num_threads >= 0)
     {
         Scheduler::get().set_num_threads(ctx.config().num_threads);
     }
 
     // Create function level memory manager
-    if(ctx.memory_management_ctx(Target::NEON) == nullptr)
+    if (ctx.memory_management_ctx(Target::NEON) == nullptr)
     {
         MemoryManagerContext mm_ctx;
         mm_ctx.target      = Target::NEON;
@@ -93,7 +89,7 @@ void NEDeviceBackend::setup_backend_context(GraphContext &ctx)
     }
 
     // Create function level weights manager
-    if(ctx.weights_management_ctx(Target::NEON) == nullptr)
+    if (ctx.weights_management_ctx(Target::NEON) == nullptr)
     {
         WeightsManagerContext wm_ctx;
         wm_ctx.target = Target::NEON;
@@ -126,9 +122,10 @@ std::unique_ptr<ITensorHandle> NEDeviceBackend::create_tensor(const Tensor &tens
     return std::make_unique<NETensorHandle>(info);
 }
 
-std::unique_ptr<ITensorHandle> NEDeviceBackend::create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent)
+std::unique_ptr<ITensorHandle>
+NEDeviceBackend::create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent)
 {
-    if(parent == nullptr)
+    if (parent == nullptr)
     {
         return nullptr;
     }
@@ -156,7 +153,7 @@ arm_compute::Status NEDeviceBackend::validate_node(INode &node)
 std::shared_ptr<arm_compute::IMemoryManager> NEDeviceBackend::create_memory_manager(MemoryManagerAffinity affinity)
 {
     std::shared_ptr<ILifetimeManager> lifetime_mgr = nullptr;
-    if(affinity == MemoryManagerAffinity::Buffer)
+    if (affinity == MemoryManagerAffinity::Buffer)
     {
         lifetime_mgr = std::make_shared<BlobLifetimeManager>();
     }
@@ -175,6 +172,11 @@ std::shared_ptr<arm_compute::IWeightsManager> NEDeviceBackend::create_weights_ma
     auto weights_mgr = std::make_shared<IWeightsManager>();
     return weights_mgr;
 }
+
+void NEDeviceBackend::sync()
+{
+    // nop
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index 0fc5291648..fe15d4cec1 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021,2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,17 +23,15 @@
  */
 #include "arm_compute/graph/backends/NEON/NEFunctionFactory.h"
 
+#include "arm_compute/graph/backends/FunctionHelpers.h"
+#include "arm_compute/graph/backends/Utils.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/TypePrinter.h"
-#include "arm_compute/graph/backends/FunctionHelpers.h"
-#include "arm_compute/graph/backends/Utils.h"
 #include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/TypePrinter.h"
 #include "arm_compute/runtime/CPP/CPPFunctions.h"
 #include "arm_compute/runtime/NEON/NEFunctions.h"
-#include "support/Cast.h"
-#include "support/ToolchainSupport.h"
 
 using namespace arm_compute::utils::cast;
 
@@ -90,7 +88,8 @@ struct NEFusedLayerTypes
 namespace detail
 {
 template <>
-std::unique_ptr<IFunction> create_normalization_layer<NENormalizationLayer, NETargetInfo>(NormalizationLayerNode &node, GraphContext &ctx)
+std::unique_ptr<IFunction> create_normalization_layer<NENormalizationLayer, NETargetInfo>(NormalizationLayerNode &node,
+                                                                                          GraphContext           &ctx)
 {
     validate_node<NETargetInfo>(node, 1 /* expected inputs */, 1 /* expected outputs */);
 
@@ -107,99 +106,127 @@ std::unique_ptr<IFunction> create_normalization_layer<NENormalizationLayer, NETa
 
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
-                               << node.name()
-                               << " Type: " << node.type()
-                               << " Target: " << NETargetInfo::TargetType
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << " Normalization info: " << norm_info.type()
-                               << std::endl);
+                               << node.name() << " Type: " << node.type() << " Target: " << NETargetInfo::TargetType
+                               << " Data Type: " << input->info()->data_type() << " Input shape: "
+                               << input->info()->tensor_shape() << " Output shape: " << output->info()->tensor_shape()
+                               << " Normalization info: " << norm_info.type() << std::endl);
 
-    return std::move(func);
+    return func;
 }
 } // namespace detail
 
 std::unique_ptr<IFunction> NEFunctionFactory::create(INode *node, GraphContext &ctx)
 {
-    if(node == nullptr)
+    if (node == nullptr)
     {
         return nullptr;
     }
 
     NodeType type = node->type();
-    switch(type)
+    switch (type)
     {
         case NodeType::ActivationLayer:
-            return detail::create_activation_layer<NEActivationLayer, NETargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
+            return detail::create_activation_layer<NEActivationLayer, NETargetInfo>(
+                *polymorphic_downcast<ActivationLayerNode *>(node));
         case NodeType::ArgMinMaxLayer:
-            return detail::create_arg_min_max_layer<NEArgMinMaxLayer, NETargetInfo>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
+            return detail::create_arg_min_max_layer<NEArgMinMaxLayer, NETargetInfo>(
+                *polymorphic_downcast<ArgMinMaxLayerNode *>(node));
         case NodeType::BatchNormalizationLayer:
-            return detail::create_batch_normalization_layer<NEBatchNormalizationLayer, NETargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+            return detail::create_batch_normalization_layer<NEBatchNormalizationLayer, NETargetInfo>(
+                *polymorphic_downcast<BatchNormalizationLayerNode *>(node));
         case NodeType::ChannelShuffleLayer:
-            return detail::create_channel_shuffle_layer<NEChannelShuffleLayer, NETargetInfo>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
+            return detail::create_channel_shuffle_layer<NEChannelShuffleLayer, NETargetInfo>(
+                *polymorphic_downcast<ChannelShuffleLayerNode *>(node));
         case NodeType::ConvolutionLayer:
-            return detail::create_convolution_layer<NEConvolutionLayerFunctions, NETargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
+            return detail::create_convolution_layer<NEConvolutionLayerFunctions, NETargetInfo>(
+                *polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
         case NodeType::DepthToSpaceLayer:
-            return detail::create_depth_to_space_layer<NEDepthToSpaceLayer, NETargetInfo>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
+            return detail::create_depth_to_space_layer<NEDepthToSpaceLayer, NETargetInfo>(
+                *polymorphic_downcast<DepthToSpaceLayerNode *>(node));
         case NodeType::DeconvolutionLayer:
-            return detail::create_deconvolution_layer<NEDeconvolutionLayer, NETargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
+            return detail::create_deconvolution_layer<NEDeconvolutionLayer, NETargetInfo>(
+                *polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
         case NodeType::ConcatenateLayer:
-            return detail::create_concatenate_layer<NEConcatenateLayer, NETargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
+            return detail::create_concatenate_layer<NEConcatenateLayer, NETargetInfo>(
+                *polymorphic_downcast<ConcatenateLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
-            return detail::create_depthwise_convolution_layer<NEDepthwiseConvolutionLayer, NETargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+            return detail::create_depthwise_convolution_layer<NEDepthwiseConvolutionLayer, NETargetInfo>(
+                *polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::DequantizationLayer:
-            return detail::create_dequantization_layer<NEDequantizationLayer, NETargetInfo>(*polymorphic_downcast<DequantizationLayerNode *>(node));
+            return detail::create_dequantization_layer<NEDequantizationLayer, NETargetInfo>(
+                *polymorphic_downcast<DequantizationLayerNode *>(node));
         case NodeType::DetectionOutputLayer:
-            return detail::create_detection_output_layer<CPPDetectionOutputLayer, NETargetInfo>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
+            return detail::create_detection_output_layer<CPPDetectionOutputLayer, NETargetInfo>(
+                *polymorphic_downcast<DetectionOutputLayerNode *>(node));
         case NodeType::DetectionPostProcessLayer:
-            return detail::create_detection_post_process_layer<NEDetectionPostProcessLayer, NETargetInfo>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+            return detail::create_detection_post_process_layer<NEDetectionPostProcessLayer, NETargetInfo>(
+                *polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
         case NodeType::EltwiseLayer:
-            return detail::create_eltwise_layer<NEEltwiseFunctions, NETargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
+            return detail::create_eltwise_layer<NEEltwiseFunctions, NETargetInfo>(
+                *polymorphic_downcast<EltwiseLayerNode *>(node));
         case NodeType::UnaryEltwiseLayer:
-            return detail::create_unary_eltwise_layer<NEUnaryEltwiseFunctions, NETargetInfo>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
+            return detail::create_unary_eltwise_layer<NEUnaryEltwiseFunctions, NETargetInfo>(
+                *polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
         case NodeType::FlattenLayer:
-            return detail::create_flatten_layer<NEFlattenLayer, NETargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
+            return detail::create_flatten_layer<NEFlattenLayer, NETargetInfo>(
+                *polymorphic_downcast<FlattenLayerNode *>(node));
         case NodeType::FullyConnectedLayer:
-            return detail::create_fully_connected_layer<NEFullyConnectedLayer, NETargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
+            return detail::create_fully_connected_layer<NEFullyConnectedLayer, NETargetInfo>(
+                *polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
         case NodeType::FusedConvolutionBatchNormalizationLayer:
-            return detail::create_fused_convolution_batch_normalization_layer<NEFusedLayerTypes, NETargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
+            return detail::create_fused_convolution_batch_normalization_layer<NEFusedLayerTypes, NETargetInfo>(
+                *polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
         case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer:
-            return detail::create_fused_depthwise_convolution_batch_normalization_layer<NEFusedLayerTypes, NETargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
+            return detail::create_fused_depthwise_convolution_batch_normalization_layer<NEFusedLayerTypes,
+                                                                                        NETargetInfo>(
+                *polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
         case NodeType::L2NormalizeLayer:
-            return detail::create_l2_normalize_layer<NEL2NormalizeLayer, NETargetInfo>(*polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
+            return detail::create_l2_normalize_layer<NEL2NormalizeLayer, NETargetInfo>(
+                *polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
         case NodeType::NormalizationLayer:
-            return detail::create_normalization_layer<NENormalizationLayer, NETargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
+            return detail::create_normalization_layer<NENormalizationLayer, NETargetInfo>(
+                *polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
         case NodeType::PadLayer:
             return detail::create_pad_layer<NEPadLayer, NETargetInfo>(*polymorphic_downcast<PadLayerNode *>(node));
         case NodeType::PermuteLayer:
-            return detail::create_permute_layer<NEPermute, NETargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
+            return detail::create_permute_layer<NEPermute, NETargetInfo>(
+                *polymorphic_downcast<PermuteLayerNode *>(node));
         case NodeType::PoolingLayer:
-            return detail::create_pooling_layer<NEPoolingLayer, NETargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
+            return detail::create_pooling_layer<NEPoolingLayer, NETargetInfo>(
+                *polymorphic_downcast<PoolingLayerNode *>(node));
         case NodeType::PReluLayer:
-            return detail::create_prelu_layer<NEPReluLayer, NETargetInfo>(*polymorphic_downcast<PReluLayerNode *>(node));
+            return detail::create_prelu_layer<NEPReluLayer, NETargetInfo>(
+                *polymorphic_downcast<PReluLayerNode *>(node));
         case NodeType::PrintLayer:
             return detail::create_print_layer<NETargetInfo>(*polymorphic_downcast<PrintLayerNode *>(node));
         case NodeType::PriorBoxLayer:
-            return detail::create_priorbox_layer<NEPriorBoxLayer, NETargetInfo>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
+            return detail::create_priorbox_layer<NEPriorBoxLayer, NETargetInfo>(
+                *polymorphic_downcast<PriorBoxLayerNode *>(node));
         case NodeType::QuantizationLayer:
-            return detail::create_quantization_layer<NEQuantizationLayer, NETargetInfo>(*polymorphic_downcast<QuantizationLayerNode *>(node));
+            return detail::create_quantization_layer<NEQuantizationLayer, NETargetInfo>(
+                *polymorphic_downcast<QuantizationLayerNode *>(node));
         case NodeType::ReductionOperationLayer:
-            return detail::create_reduction_operation_layer<NEReductionOperation, NETargetInfo>(*polymorphic_downcast<ReductionLayerNode *>(node), ctx);
+            return detail::create_reduction_operation_layer<NEReductionOperation, NETargetInfo>(
+                *polymorphic_downcast<ReductionLayerNode *>(node), ctx);
         case NodeType::ReorgLayer:
-            return detail::create_reorg_layer<NEReorgLayer, NETargetInfo>(*polymorphic_downcast<ReorgLayerNode *>(node));
+            return detail::create_reorg_layer<NEReorgLayer, NETargetInfo>(
+                *polymorphic_downcast<ReorgLayerNode *>(node));
         case NodeType::ReshapeLayer:
-            return detail::create_reshape_layer<NEReshapeLayer, NETargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
+            return detail::create_reshape_layer<NEReshapeLayer, NETargetInfo>(
+                *polymorphic_downcast<ReshapeLayerNode *>(node));
         case NodeType::ResizeLayer:
             return detail::create_resize_layer<NEScale, NETargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
         case NodeType::SliceLayer:
             return detail::create_slice_layer<NESlice, NETargetInfo>(*polymorphic_downcast<SliceLayerNode *>(node));
         case NodeType::SoftmaxLayer:
-            return detail::create_softmax_layer<NESoftmaxLayer, NETargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
+            return detail::create_softmax_layer<NESoftmaxLayer, NETargetInfo>(
+                *polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
         case NodeType::StackLayer:
-            return detail::create_stack_layer<NEStackLayer, NETargetInfo>(*polymorphic_downcast<StackLayerNode *>(node));
+            return detail::create_stack_layer<NEStackLayer, NETargetInfo>(
+                *polymorphic_downcast<StackLayerNode *>(node));
         case NodeType::StridedSliceLayer:
-            return detail::create_strided_slice_layer<NEStridedSlice, NETargetInfo>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
+            return detail::create_strided_slice_layer<NEStridedSlice, NETargetInfo>(
+                *polymorphic_downcast<StridedSliceLayerNode *>(node));
         default:
             return nullptr;
     }
diff --git a/src/graph/backends/NEON/NENodeValidator.cpp b/src/graph/backends/NEON/NENodeValidator.cpp
index a485e5d235..a97806f92c 100644
--- a/src/graph/backends/NEON/NENodeValidator.cpp
+++ b/src/graph/backends/NEON/NENodeValidator.cpp
@@ -25,9 +25,9 @@
 
 #include "arm_compute/graph/backends/ValidateHelpers.h"
 #include "arm_compute/graph/nodes/Nodes.h"
-
 #include "arm_compute/runtime/CPP/CPPFunctions.h"
 #include "arm_compute/runtime/NEON/NEFunctions.h"
+
 #include "support/Cast.h"
 
 using namespace arm_compute::utils::cast;
@@ -56,41 +56,51 @@ struct NEUnaryEltwiseLayerFunctions
 
 Status NENodeValidator::validate(INode *node)
 {
-    if(node == nullptr)
+    if (node == nullptr)
     {
         return Status{};
     }
 
     NodeType type = node->type();
-    switch(type)
+    switch (type)
     {
         case NodeType::ArgMinMaxLayer:
-            return detail::validate_arg_min_max_layer<NEArgMinMaxLayer>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
+            return detail::validate_arg_min_max_layer<NEArgMinMaxLayer>(
+                *polymorphic_downcast<ArgMinMaxLayerNode *>(node));
         case NodeType::BoundingBoxTransformLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : BoundingBoxTransformLayer");
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR,
+                                            "Unsupported operation : BoundingBoxTransformLayer");
         case NodeType::ChannelShuffleLayer:
-            return detail::validate_channel_shuffle_layer<NEChannelShuffleLayer>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
+            return detail::validate_channel_shuffle_layer<NEChannelShuffleLayer>(
+                *polymorphic_downcast<ChannelShuffleLayerNode *>(node));
         case NodeType::ConvolutionLayer:
-            return detail::validate_convolution_layer<NEConvolutionLayer,
-                   NEDirectConvolutionLayer,
-                   NEGEMMConvolutionLayer,
-                   NEWinogradConvolutionLayer>(*polymorphic_downcast<ConvolutionLayerNode *>(node));
+            return detail::validate_convolution_layer<NEConvolutionLayer, NEDirectConvolutionLayer,
+                                                      NEGEMMConvolutionLayer, NEWinogradConvolutionLayer>(
+                *polymorphic_downcast<ConvolutionLayerNode *>(node));
         case NodeType::DepthToSpaceLayer:
-            return detail::validate_depth_to_space_layer<NEDepthToSpaceLayer>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
+            return detail::validate_depth_to_space_layer<NEDepthToSpaceLayer>(
+                *polymorphic_downcast<DepthToSpaceLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
-            return detail::validate_depthwise_convolution_layer<NEDepthwiseConvolutionLayer>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+            return detail::validate_depthwise_convolution_layer<NEDepthwiseConvolutionLayer>(
+                *polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::DequantizationLayer:
-            return detail::validate_dequantization_layer<NEDequantizationLayer>(*polymorphic_downcast<DequantizationLayerNode *>(node));
+            return detail::validate_dequantization_layer<NEDequantizationLayer>(
+                *polymorphic_downcast<DequantizationLayerNode *>(node));
         case NodeType::DetectionOutputLayer:
-            return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
+            return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(
+                *polymorphic_downcast<DetectionOutputLayerNode *>(node));
         case NodeType::DetectionPostProcessLayer:
-            return detail::validate_detection_post_process_layer<NEDetectionPostProcessLayer>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+            return detail::validate_detection_post_process_layer<NEDetectionPostProcessLayer>(
+                *polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
         case NodeType::GenerateProposalsLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : GenerateProposalsLayer");
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR,
+                                            "Unsupported operation : GenerateProposalsLayer");
         case NodeType::L2NormalizeLayer:
-            return detail::validate_l2_normalize_layer<NEL2NormalizeLayer>(*polymorphic_downcast<L2NormalizeLayerNode *>(node));
+            return detail::validate_l2_normalize_layer<NEL2NormalizeLayer>(
+                *polymorphic_downcast<L2NormalizeLayerNode *>(node));
         case NodeType::NormalizePlanarYUVLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : NormalizePlanarYUVLayer");
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR,
+                                            "Unsupported operation : NormalizePlanarYUVLayer");
         case NodeType::PadLayer:
             return detail::validate_pad_layer<NEPadLayer>(*polymorphic_downcast<PadLayerNode *>(node));
         case NodeType::PermuteLayer:
@@ -100,23 +110,29 @@ Status NENodeValidator::validate(INode *node)
         case NodeType::PriorBoxLayer:
             return detail::validate_priorbox_layer<NEPriorBoxLayer>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
         case NodeType::QuantizationLayer:
-            return detail::validate_quantization_layer<NEQuantizationLayer>(*polymorphic_downcast<QuantizationLayerNode *>(node));
+            return detail::validate_quantization_layer<NEQuantizationLayer>(
+                *polymorphic_downcast<QuantizationLayerNode *>(node));
         case NodeType::ReductionOperationLayer:
-            return detail::validate_reduction_operation_layer<NEReductionOperation>(*polymorphic_downcast<ReductionLayerNode *>(node));
+            return detail::validate_reduction_operation_layer<NEReductionOperation>(
+                *polymorphic_downcast<ReductionLayerNode *>(node));
         case NodeType::ReorgLayer:
             return detail::validate_reorg_layer<NEReorgLayer>(*polymorphic_downcast<ReorgLayerNode *>(node));
         case NodeType::ReshapeLayer:
             return detail::validate_reshape_layer<NEReshapeLayer>(*polymorphic_downcast<ReshapeLayerNode *>(node));
         case NodeType::ROIAlignLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ROIAlignLayer");
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR,
+                                            "Unsupported operation : ROIAlignLayer");
         case NodeType::SliceLayer:
             return detail::validate_slice_layer<NESlice>(*polymorphic_downcast<SliceLayerNode *>(node));
         case NodeType::StridedSliceLayer:
-            return detail::validate_strided_slice_layer<NEStridedSlice>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
+            return detail::validate_strided_slice_layer<NEStridedSlice>(
+                *polymorphic_downcast<StridedSliceLayerNode *>(node));
         case NodeType::EltwiseLayer:
-            return detail::validate_eltwise_Layer<NEEltwiseLayerFunctions>(*polymorphic_downcast<EltwiseLayerNode *>(node));
+            return detail::validate_eltwise_Layer<NEEltwiseLayerFunctions>(
+                *polymorphic_downcast<EltwiseLayerNode *>(node));
         case NodeType::UnaryEltwiseLayer:
-            return detail::validate_unary_eltwise_layer<NEUnaryEltwiseLayerFunctions>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
+            return detail::validate_unary_eltwise_layer<NEUnaryEltwiseLayerFunctions>(
+                *polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
         default:
             return Status{};
     }
diff --git a/src/graph/backends/NEON/NESubTensorHandle.cpp b/src/graph/backends/NEON/NESubTensorHandle.cpp
index 36f29d0d10..8964a00c5e 100644
--- a/src/graph/backends/NEON/NESubTensorHandle.cpp
+++ b/src/graph/backends/NEON/NESubTensorHandle.cpp
@@ -29,7 +29,10 @@ namespace graph
 {
 namespace backends
 {
-NESubTensorHandle::NESubTensorHandle(ITensorHandle *parent_handle, const TensorShape &shape, const Coordinates &coords, bool extend_parent)
+NESubTensorHandle::NESubTensorHandle(ITensorHandle     *parent_handle,
+                                     const TensorShape &shape,
+                                     const Coordinates &coords,
+                                     bool               extend_parent)
     : _sub_tensor(), _parent_handle(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(!parent_handle);
@@ -95,4 +98,4 @@ Target NESubTensorHandle::target() const
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/NEON/NETensorHandle.cpp b/src/graph/backends/NEON/NETensorHandle.cpp
index 4393156e8a..dabf67060d 100644
--- a/src/graph/backends/NEON/NETensorHandle.cpp
+++ b/src/graph/backends/NEON/NETensorHandle.cpp
@@ -24,6 +24,7 @@
 #include "arm_compute/graph/backends/NEON/NETensorHandle.h"
 
 #include "arm_compute/runtime/MemoryGroup.h"
+
 #include "support/Cast.h"
 
 namespace arm_compute
@@ -32,8 +33,7 @@ namespace graph
 {
 namespace backends
 {
-NETensorHandle::NETensorHandle(const ITensorInfo &info)
-    : _tensor()
+NETensorHandle::NETensorHandle(const ITensorInfo &info) : _tensor()
 {
     _tensor.allocator()->init(info);
 }
@@ -50,7 +50,7 @@ void NETensorHandle::free()
 
 void NETensorHandle::manage(IMemoryGroup *mg)
 {
-    if(mg != nullptr)
+    if (mg != nullptr)
     {
         mg->manage(&_tensor);
     }
@@ -68,7 +68,7 @@ void NETensorHandle::unmap()
 void NETensorHandle::release_if_unused()
 {
     // TODO (geopin01): Release tensor only if all sub-tensors are marked as not used
-    if(!_tensor.is_used())
+    if (!_tensor.is_used())
     {
         _tensor.allocator()->free();
     }
@@ -100,4 +100,4 @@ Target NETensorHandle::target() const
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp
index b45f453f23..1e813dc678 100644
--- a/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp
+++ b/src/graph/detail/CrossLayerMemoryManagerHelpers.cpp
@@ -23,6 +23,8 @@
  */
 #include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h"
 
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/GraphManager.h"
@@ -30,9 +32,7 @@
 #include "arm_compute/graph/Tensor.h"
 #include "arm_compute/graph/Types.h"
 #include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/backends/BackendRegistry.h"
 
-#include "arm_compute/core/ITensor.h"
 #include "support/Cast.h"
 
 #include <algorithm>
@@ -78,28 +78,28 @@ IMemoryGroup *get_memory_group_from_handle(GraphContext &ctx, ITensorHandle *han
  */
 std::set<ITensorHandle *> get_const_handles(const Graph &g)
 {
-    std::set<NodeType> const_node_types = { NodeType::Input, NodeType::Output, NodeType::Const };
+    std::set<NodeType> const_node_types = {NodeType::Input, NodeType::Output, NodeType::Const};
 
     std::set<ITensorHandle *> const_tensors;
 
     auto &nodes = g.nodes();
-    for(auto &node : nodes)
+    for (auto &node : nodes)
     {
         // If its a const node:
-        if(node != nullptr && const_node_types.find(node->type()) != std::end(const_node_types))
+        if (node != nullptr && const_node_types.find(node->type()) != std::end(const_node_types))
         {
             // TODO (geopin01) : Create IO iterator wrappers
             // Add all its inputs / outputs to the list of constant handles
-            for(unsigned int i = 0; i < node->num_inputs(); ++i)
+            for (unsigned int i = 0; i < node->num_inputs(); ++i)
             {
-                if(node->input(i) != nullptr)
+                if (node->input(i) != nullptr)
                 {
                     const_tensors.insert(node->input(i)->handle()->parent_handle());
                 }
             }
-            for(unsigned int i = 0; i < node->num_outputs(); ++i)
+            for (unsigned int i = 0; i < node->num_outputs(); ++i)
             {
-                if(node->output(i) != nullptr)
+                if (node->output(i) != nullptr)
                 {
                     const_tensors.insert(node->output(i)->handle()->parent_handle());
                 }
@@ -118,9 +118,8 @@ std::set<ITensorHandle *> get_const_handles(const Graph &g)
  *
  * @return List of transition handles
  */
-TaskHandles get_transition_handles(GraphContext                    &ctx,
-                                   ExecutionTask                   &task,
-                                   const std::set<ITensorHandle *> &const_tensors)
+TaskHandles
+get_transition_handles(GraphContext &ctx, ExecutionTask &task, const std::set<ITensorHandle *> &const_tensors)
 {
     ARM_COMPUTE_ERROR_ON(task.node == nullptr || (task.task == nullptr && !is_utility_node(task.node)));
     INode &node = *task.node;
@@ -128,28 +127,30 @@ TaskHandles get_transition_handles(GraphContext                    &ctx,
     TaskHandles transition_handles;
 
     // Add input handles
-    for(unsigned int i = 0; i < node.input_edges().size(); ++i)
+    for (unsigned int i = 0; i < node.input_edges().size(); ++i)
     {
         Edge *input_edge = node.input_edge(i);
         // If this input is the output of another node
-        if(input_edge != nullptr && input_edge->tensor() != nullptr && const_tensors.find(input_edge->tensor()->handle()->parent_handle()) == std::end(const_tensors))
+        if (input_edge != nullptr && input_edge->tensor() != nullptr &&
+            const_tensors.find(input_edge->tensor()->handle()->parent_handle()) == std::end(const_tensors))
         {
             // Then add it to the list of transition buffers
             ITensorHandle *tensor_handle = input_edge->tensor()->handle()->parent_handle();
-            IMemoryGroup *mm_group      = get_memory_group_from_handle(ctx, tensor_handle);
+            IMemoryGroup  *mm_group      = get_memory_group_from_handle(ctx, tensor_handle);
             transition_handles.input_handles.emplace_back(std::make_pair(tensor_handle, mm_group));
         }
     }
 
     // Add output handles
-    for(unsigned int i = 0; i < node.num_outputs(); ++i)
+    for (unsigned int i = 0; i < node.num_outputs(); ++i)
     {
         Tensor *output_tensor = node.output(i);
         // If this output is used as an input for another node
-        if(output_tensor != nullptr && const_tensors.find(output_tensor->handle()->parent_handle()) == std::end(const_tensors))
+        if (output_tensor != nullptr &&
+            const_tensors.find(output_tensor->handle()->parent_handle()) == std::end(const_tensors))
         {
             ITensorHandle *tensor_handle = output_tensor->handle()->parent_handle();
-            IMemoryGroup *mm_group      = get_memory_group_from_handle(ctx, tensor_handle);
+            IMemoryGroup  *mm_group      = get_memory_group_from_handle(ctx, tensor_handle);
             transition_handles.output_handles.emplace_back(std::make_pair(tensor_handle, mm_group));
         }
     }
@@ -164,11 +165,11 @@ TaskHandles get_transition_handles(GraphContext                    &ctx,
  */
 void count_input_handles_per_target(const TaskHandles &task_handles, TargetHandleCounter &handle_counter)
 {
-    for(const auto &handle : task_handles.input_handles)
+    for (const auto &handle : task_handles.input_handles)
     {
         ITensorHandle *key            = handle.first;
         HandleCounter &target_counter = handle_counter[key->target()];
-        if(target_counter.find(key) == std::end(target_counter))
+        if (target_counter.find(key) == std::end(target_counter))
         {
             target_counter.emplace(std::make_pair(key, 1));
         }
@@ -192,12 +193,12 @@ void configure_handle_lifetime(std::vector<TaskHandles> &tasks_handles, const Ha
     // Acquires the given handles and sets them as in flight if they aren't already
     auto acquire = [&](std::vector<std::pair<ITensorHandle *, IMemoryGroup *>> &handles)
     {
-        for(auto &handle : handles)
+        for (auto &handle : handles)
         {
             ITensorHandle *parent_handle = handle.first;
             ARM_COMPUTE_ERROR_ON(parent_handle == nullptr);
             // If the tensor is not already in flight:
-            if(tensors_in_flight.find(parent_handle) == std::end(tensors_in_flight))
+            if (tensors_in_flight.find(parent_handle) == std::end(tensors_in_flight))
             {
                 ARM_COMPUTE_ERROR_ON(hc.find(parent_handle) == std::end(hc));
                 // Then add it to the list of in flight tensors
@@ -208,20 +209,20 @@ void configure_handle_lifetime(std::vector<TaskHandles> &tasks_handles, const Ha
         }
     };
 
-    for(auto &task_handle : tasks_handles)
+    for (auto &task_handle : tasks_handles)
     {
         // Marking all the input and output tensors of the task as in flight
         acquire(task_handle.input_handles);
         acquire(task_handle.output_handles);
 
         // Releasing the input tensors
-        for(auto &input_handle : task_handle.input_handles)
+        for (auto &input_handle : task_handle.input_handles)
         {
             ITensorHandle *ihandle = input_handle.first;
             ARM_COMPUTE_ERROR_ON(ihandle == nullptr);
             ARM_COMPUTE_ERROR_ON(tensors_in_flight.find(ihandle) == std::end(tensors_in_flight));
             --tensors_in_flight[ihandle];
-            if(tensors_in_flight[ihandle] <= 0)
+            if (tensors_in_flight[ihandle] <= 0)
             {
                 // Remove tensor for tensors in flight
                 tensors_in_flight.erase(ihandle);
@@ -242,7 +243,7 @@ void configure_transition_manager(Graph &g, GraphContext &ctx, ExecutionWorkload
     TargetHandleCounter      target_handle_count;
 
     // Count handles
-    for(auto &task : workload.tasks)
+    for (auto &task : workload.tasks)
     {
         // Populates IO handles
         tasks_handles.push_back(get_transition_handles(ctx, task, const_tensors));
@@ -252,12 +253,12 @@ void configure_transition_manager(Graph &g, GraphContext &ctx, ExecutionWorkload
     }
 
     // Setup memory managers
-    for(auto &hc : target_handle_count)
+    for (auto &hc : target_handle_count)
     {
         MemoryManagerContext *mm_ctx = ctx.memory_management_ctx(hc.first);
-        if(mm_ctx != nullptr)
+        if (mm_ctx != nullptr)
         {
-            if(mm_ctx->cross_mm != nullptr && mm_ctx->cross_group != nullptr)
+            if (mm_ctx->cross_mm != nullptr && mm_ctx->cross_group != nullptr)
             {
                 // Manage and allocate tensors
                 configure_handle_lifetime(tasks_handles, hc.second);
diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp
index 5be3706cfe..870d24a6c7 100644
--- a/src/graph/detail/ExecutionHelpers.cpp
+++ b/src/graph/detail/ExecutionHelpers.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,12 +23,12 @@
  */
 #include "arm_compute/graph/detail/ExecutionHelpers.h"
 
+#include "arm_compute/graph/backends/BackendRegistry.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/GraphManager.h"
 #include "arm_compute/graph/Tensor.h"
 #include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/backends/BackendRegistry.h"
 
 namespace arm_compute
 {
@@ -41,9 +41,9 @@ void validate_all_nodes(Graph &g)
     auto &nodes = g.nodes();
 
     // Create tasks
-    for(auto &node : nodes)
+    for (auto &node : nodes)
     {
-        if(node != nullptr)
+        if (node != nullptr)
         {
             Target                    assigned_target = node->assigned_target();
             backends::IDeviceBackend &backend         = backends::BackendRegistry::get().get_backend(assigned_target);
@@ -57,9 +57,9 @@ void configure_all_tensors(Graph &g)
 {
     auto &tensors = g.tensors();
 
-    for(auto &tensor : tensors)
+    for (auto &tensor : tensors)
     {
-        if(tensor && tensor->handle() == nullptr)
+        if (tensor && tensor->handle() == nullptr)
         {
             Target                         target  = tensor->desc().target;
             backends::IDeviceBackend      &backend = backends::BackendRegistry::get().get_backend(target);
@@ -72,10 +72,10 @@ void configure_all_tensors(Graph &g)
 
 void allocate_all_input_tensors(INode &node)
 {
-    for(unsigned int i = 0; i < node.num_inputs(); ++i)
+    for (unsigned int i = 0; i < node.num_inputs(); ++i)
     {
         Tensor *tensor = node.input(i);
-        if(tensor != nullptr && !tensor->bound_edges().empty())
+        if (tensor != nullptr && !tensor->bound_edges().empty())
         {
             ARM_COMPUTE_ERROR_ON_MSG(!tensor->handle(), "Tensor handle is not configured!");
             tensor->handle()->allocate();
@@ -85,10 +85,10 @@ void allocate_all_input_tensors(INode &node)
 
 void allocate_all_output_tensors(INode &node)
 {
-    for(unsigned int i = 0; i < node.num_outputs(); ++i)
+    for (unsigned int i = 0; i < node.num_outputs(); ++i)
     {
         Tensor *tensor = node.output(i);
-        if(tensor != nullptr && !tensor->bound_edges().empty())
+        if (tensor != nullptr && !tensor->bound_edges().empty())
         {
             ARM_COMPUTE_ERROR_ON_MSG(!tensor->handle(), "Tensor handle is not configured!");
             tensor->handle()->allocate();
@@ -98,11 +98,11 @@ void allocate_all_output_tensors(INode &node)
 
 void allocate_const_tensors(Graph &g)
 {
-    for(auto &node : g.nodes())
+    for (auto &node : g.nodes())
     {
-        if(node != nullptr)
+        if (node != nullptr)
         {
-            switch(node->type())
+            switch (node->type())
             {
                 case NodeType::Const:
                 case NodeType::Input:
@@ -121,9 +121,10 @@ void allocate_all_tensors(Graph &g)
 {
     auto &tensors = g.tensors();
 
-    for(auto &tensor : tensors)
+    for (auto &tensor : tensors)
     {
-        if(tensor && !tensor->bound_edges().empty() && tensor->handle() != nullptr && tensor->handle()->tensor().info()->is_resizable() && tensor->handle()->tensor().is_used())
+        if (tensor && !tensor->bound_edges().empty() && tensor->handle() != nullptr &&
+            tensor->handle()->tensor().info()->is_resizable() && tensor->handle()->tensor().is_used())
         {
             tensor->handle()->allocate();
         }
@@ -140,15 +141,15 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::ve
     workload.tasks.reserve(node_order.size());
 
     // Create tasks
-    for(auto &node_id : node_order)
+    for (auto &node_id : node_order)
     {
         auto node = g.node(node_id);
-        if(node != nullptr)
+        if (node != nullptr)
         {
             Target                     assigned_target = node->assigned_target();
-            backends::IDeviceBackend &backend         = backends::BackendRegistry::get().get_backend(assigned_target);
+            backends::IDeviceBackend  &backend         = backends::BackendRegistry::get().get_backend(assigned_target);
             std::unique_ptr<IFunction> func            = backend.configure_node(*node, ctx);
-            if(func != nullptr || is_utility_node(node))
+            if (func != nullptr || is_utility_node(node))
             {
                 workload.tasks.emplace_back(ExecutionTask(std::move(func), node));
             }
@@ -156,14 +157,14 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::ve
     }
 
     // Add inputs and outputs
-    for(auto &node : g.nodes())
+    for (auto &node : g.nodes())
     {
-        if(node != nullptr && node->type() == NodeType::Input)
+        if (node != nullptr && node->type() == NodeType::Input)
         {
             workload.inputs.push_back(node->output(0));
         }
 
-        if(node != nullptr && node->type() == NodeType::Output)
+        if (node != nullptr && node->type() == NodeType::Output)
         {
             workload.outputs.push_back(node->input(0));
             continue;
@@ -175,9 +176,9 @@ ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::ve
 
 void release_unused_tensors(Graph &g)
 {
-    for(auto &tensor : g.tensors())
+    for (auto &tensor : g.tensors())
     {
-        if(tensor != nullptr && tensor->handle() != nullptr)
+        if (tensor != nullptr && tensor->handle() != nullptr)
         {
             tensor->handle()->release_if_unused();
         }
@@ -194,11 +195,11 @@ void call_all_const_node_accessors(Graph &g)
 {
     auto &nodes = g.nodes();
 
-    for(auto &node : nodes)
+    for (auto &node : nodes)
     {
-        if(node != nullptr && node->type() == NodeType::Const && node->num_outputs())
+        if (node != nullptr && node->type() == NodeType::Const && node->num_outputs())
         {
-            if(!node->output(0)->bound_edges().empty())
+            if (!node->output(0)->bound_edges().empty())
             {
                 call_tensor_accessor(node->output(0));
             }
@@ -209,18 +210,19 @@ void call_all_const_node_accessors(Graph &g)
 bool call_all_input_node_accessors(ExecutionWorkload &workload)
 {
     bool is_valid = true;
-    std::for_each(std::begin(workload.inputs), std::end(workload.inputs), [&](Tensor * input_tensor)
-    {
-        bool valid_input = (input_tensor != nullptr) && input_tensor->call_accessor();
-        is_valid         = is_valid && valid_input;
-    });
+    std::for_each(std::begin(workload.inputs), std::end(workload.inputs),
+                  [&](Tensor *input_tensor)
+                  {
+                      bool valid_input = (input_tensor != nullptr) && input_tensor->call_accessor();
+                      is_valid         = is_valid && valid_input;
+                  });
     return is_valid;
 }
 
 void prepare_all_tasks(ExecutionWorkload &workload)
 {
     ARM_COMPUTE_ERROR_ON(workload.graph == nullptr);
-    for(auto &task : workload.tasks)
+    for (auto &task : workload.tasks)
     {
         task.prepare();
         release_unused_tensors(*workload.graph);
@@ -232,24 +234,24 @@ void call_all_tasks(ExecutionWorkload &workload)
     ARM_COMPUTE_ERROR_ON(workload.ctx == nullptr);
 
     // Acquire memory for the transition buffers
-    for(auto &mm_ctx : workload.ctx->memory_managers())
+    for (auto &mm_ctx : workload.ctx->memory_managers())
     {
-        if(mm_ctx.second.cross_group != nullptr)
+        if (mm_ctx.second.cross_group != nullptr)
         {
             mm_ctx.second.cross_group->acquire();
         }
     }
 
     // Execute tasks
-    for(auto &task : workload.tasks)
+    for (auto &task : workload.tasks)
     {
         task();
     }
 
     // Release memory for the transition buffers
-    for(auto &mm_ctx : workload.ctx->memory_managers())
+    for (auto &mm_ctx : workload.ctx->memory_managers())
     {
-        if(mm_ctx.second.cross_group != nullptr)
+        if (mm_ctx.second.cross_group != nullptr)
         {
             mm_ctx.second.cross_group->release();
         }
@@ -259,11 +261,14 @@ void call_all_tasks(ExecutionWorkload &workload)
 bool call_all_output_node_accessors(ExecutionWorkload &workload)
 {
     bool is_valid = true;
-    std::for_each(std::begin(workload.outputs), std::end(workload.outputs), [&](Tensor * output_tensor)
-    {
-        bool valid_output = (output_tensor != nullptr) && output_tensor->call_accessor();
-        is_valid          = is_valid && valid_output;
-    });
+    std::for_each(std::begin(workload.outputs), std::end(workload.outputs),
+                  [&](Tensor *output_tensor)
+                  {
+                      bool valid_output = (output_tensor != nullptr) && output_tensor->call_accessor();
+                      is_valid          = is_valid && valid_output;
+                  });
+
+    sync_backends();
 
     return is_valid;
 }
diff --git a/src/graph/frontend/Stream.cpp b/src/graph/frontend/Stream.cpp
index 44c8400874..383a6dc67f 100644
--- a/src/graph/frontend/Stream.cpp
+++ b/src/graph/frontend/Stream.cpp
@@ -23,8 +23,8 @@
  */
 #include "arm_compute/graph/frontend/Stream.h"
 
-#include "arm_compute/graph/Utils.h"
 #include "arm_compute/graph/frontend/ILayer.h"
+#include "arm_compute/graph/Utils.h"
 
 namespace arm_compute
 {
@@ -32,8 +32,7 @@ namespace graph
 {
 namespace frontend
 {
-Stream::Stream(size_t id, std::string name)
-    : _ctx(), _manager(), _g(id, std::move(name))
+Stream::Stream(size_t id, std::string name) : _ctx(), _manager(), _g(id, std::move(name))
 {
 }
 
diff --git a/src/graph/frontend/SubStream.cpp b/src/graph/frontend/SubStream.cpp
index 4b42207e80..8596aaa1a3 100644
--- a/src/graph/frontend/SubStream.cpp
+++ b/src/graph/frontend/SubStream.cpp
@@ -23,8 +23,8 @@
  */
 #include "arm_compute/graph/frontend/SubStream.h"
 
-#include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/frontend/ILayer.h"
+#include "arm_compute/graph/Graph.h"
 
 namespace arm_compute
 {
@@ -32,8 +32,7 @@ namespace graph
 {
 namespace frontend
 {
-SubStream::SubStream(IStream &s)
-    : _s(s)
+SubStream::SubStream(IStream &s) : _s(s)
 {
     _hints     = s.hints();
     _tail_node = s.tail_node();
diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
index 963b948432..1b7ee3c4a4 100644
--- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp
+++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
@@ -23,12 +23,12 @@
  */
 #include "arm_compute/graph/mutators/DepthConcatSubTensorMutator.h"
 
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
 #include "arm_compute/graph/algorithms/TopologicalSort.h"
 #include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/Logger.h"
 #include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
+#include "arm_compute/graph/Utils.h"
 
 #include "support/Cast.h"
 #include "support/Iterable.h"
@@ -50,7 +50,7 @@ IGraphMutator::MutationType DepthConcatSubTensorMutator::type() const
 void DepthConcatSubTensorMutator::mutate(Graph &g)
 {
     // Early exit if no Concatenation layers exist in graph
-    if(g.nodes(NodeType::ConcatenateLayer).empty())
+    if (g.nodes(NodeType::ConcatenateLayer).empty())
     {
         return;
     }
@@ -59,43 +59,48 @@ void DepthConcatSubTensorMutator::mutate(Graph &g)
     std::vector<NodeID> topological_sorted_node_ids = dfs(g);
 
     // Should be in reverse order of execution
-    for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
+    for (auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
     {
         INode *node = g.node(node_id);
-        if(node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr)
+        if (node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr)
         {
             // Get output tensor
             auto output_tensor = node->output(0);
 
             // Check concatenation axis (Sub-tensor optimization is supported for concatenation axis >=2)
             auto *concat_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node);
-            if(output_tensor == nullptr || get_dimension_idx(output_tensor->desc().layout, concat_node->concatenation_axis()) < 2)
+            if (output_tensor == nullptr ||
+                get_dimension_idx(output_tensor->desc().layout, concat_node->concatenation_axis()) < 2)
             {
                 continue;
             }
 
             // Check that all tensor have the same target, valid inputs and same quantization info
-            bool is_valid = std::all_of(node->input_edges().cbegin(), node->input_edges().cend(),
-                                        [&](const EdgeID & eid)
-            {
-                return (g.edge(eid) != nullptr) && (g.edge(eid)->tensor() != nullptr) && (g.edge(eid)->tensor()->desc().target == output_tensor->desc().target)
-                       && (g.edge(eid)->tensor()->desc().quant_info == output_tensor->desc().quant_info);
-            });
+            bool is_valid =
+                std::all_of(node->input_edges().cbegin(), node->input_edges().cend(),
+                            [&](const EdgeID &eid)
+                            {
+                                return (g.edge(eid) != nullptr) && (g.edge(eid)->tensor() != nullptr) &&
+                                       (g.edge(eid)->tensor()->desc().target == output_tensor->desc().target) &&
+                                       (g.edge(eid)->tensor()->desc().quant_info == output_tensor->desc().quant_info);
+                            });
 
             // Create subtensors
-            if(is_valid && is_target_supported(output_tensor->desc().target))
+            if (is_valid && is_target_supported(output_tensor->desc().target))
             {
                 ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : "
                                               << node->id() << " and name : " << node->name() << std::endl);
                 // Create sub-tensor handles
                 unsigned depth = 0;
-                for(unsigned int i = 0; i < node->input_edges().size(); ++i)
+                for (unsigned int i = 0; i < node->input_edges().size(); ++i)
                 {
                     auto       input_tensor = node->input(i);
                     const auto input_shape  = input_tensor->desc().shape;
 
-                    backends::IDeviceBackend      &backend = backends::BackendRegistry::get().get_backend(input_tensor->desc().target);
-                    std::unique_ptr<ITensorHandle> handle  = backend.create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false);
+                    backends::IDeviceBackend &backend =
+                        backends::BackendRegistry::get().get_backend(input_tensor->desc().target);
+                    std::unique_ptr<ITensorHandle> handle =
+                        backend.create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false);
                     input_tensor->set_handle(std::move(handle));
 
                     depth += input_shape.z();
diff --git a/src/graph/mutators/GroupedConvolutionMutator.cpp b/src/graph/mutators/GroupedConvolutionMutator.cpp
index b7c551ce8b..31efba6bb1 100644
--- a/src/graph/mutators/GroupedConvolutionMutator.cpp
+++ b/src/graph/mutators/GroupedConvolutionMutator.cpp
@@ -23,15 +23,14 @@
  */
 #include "arm_compute/graph/mutators/GroupedConvolutionMutator.h"
 
+#include "arm_compute/graph/backends/BackendRegistry.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/GraphBuilder.h"
 #include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/backends/BackendRegistry.h"
 #include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/Utils.h"
 
 #include "support/Cast.h"
-
 #include "support/StringSupport.h"
 
 #include <set>
@@ -42,43 +41,51 @@ namespace graph
 {
 namespace
 {
-NodeID create_grouped_convolution(Graph &g, const NodeParams &params, NodeIdxPair input, NodeID weights, NodeID bias,
-                                  PadStrideInfo conv_info, ConvolutionMethod method, ActivationLayerInfo fused_act, FastMathHint fast_math_hint, unsigned int num_groups)
+NodeID create_grouped_convolution(Graph              &g,
+                                  const NodeParams   &params,
+                                  NodeIdxPair         input,
+                                  NodeID              weights,
+                                  NodeID              bias,
+                                  PadStrideInfo       conv_info,
+                                  ConvolutionMethod   method,
+                                  ActivationLayerInfo fused_act,
+                                  FastMathHint        fast_math_hint,
+                                  unsigned int        num_groups)
 {
     bool has_bias = (bias != EmptyNodeID);
 
     // Split input
     const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
-    const unsigned int     input_idx         = get_dimension_idx(input_tensor_desc.layout, DataLayoutDimension::CHANNEL);
-    NodeID                 input_split       = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx);
+    const unsigned int     input_idx   = get_dimension_idx(input_tensor_desc.layout, DataLayoutDimension::CHANNEL);
+    NodeID                 input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx);
 
     // Split weights
     const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]);
-    const unsigned int     batch_idx           = get_dimension_idx(weights_tensor_desc.layout, DataLayoutDimension::BATCHES);
-    NodeID                 weights_split       = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx);
+    const unsigned int     batch_idx     = get_dimension_idx(weights_tensor_desc.layout, DataLayoutDimension::BATCHES);
+    NodeID                 weights_split = GraphBuilder::add_split_node(g, params, {weights, 0}, num_groups, batch_idx);
 
     // Split bias
     NodeID bias_split = EmptyNodeID;
-    if(has_bias)
+    if (has_bias)
     {
         // Split bias
-        bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0);
+        bias_split = GraphBuilder::add_split_node(g, params, {bias, 0}, num_groups, 0);
     }
 
     std::vector<NodeIdxPair> convolution_outputs;
-    for(unsigned int i = 0; i < num_groups; ++i)
+    for (unsigned int i = 0; i < num_groups; ++i)
     {
         NodeParams group_params = params;
         NodeID     conv_nid     = g.add_node<ConvolutionLayerNode>(conv_info, 1, method, fast_math_hint);
         g.add_connection(input_split, i, conv_nid, 0);
         g.add_connection(weights_split, i, conv_nid, 1);
-        if(has_bias)
+        if (has_bias)
         {
             g.add_connection(bias_split, i, conv_nid, 2);
         }
 
         // Add group name
-        if(!group_params.name.empty())
+        if (!group_params.name.empty())
         {
             group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i));
         }
@@ -92,7 +99,7 @@ NodeID create_grouped_convolution(Graph &g, const NodeParams &params, NodeIdxPai
         auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node);
         conv_node->set_fused_activation(fused_act);
 
-        convolution_outputs.push_back({ conv_nid, 0 });
+        convolution_outputs.push_back({conv_nid, 0});
     }
 
     // Depth concatenate output
@@ -113,7 +120,7 @@ IGraphMutator::MutationType GroupedConvolutionMutator::type() const
 void GroupedConvolutionMutator::mutate(Graph &g)
 {
     // Early exit if no Convolution layers exist in graph
-    if(g.nodes(NodeType::ConvolutionLayer).empty())
+    if (g.nodes(NodeType::ConvolutionLayer).empty())
     {
         return;
     }
@@ -122,17 +129,18 @@ void GroupedConvolutionMutator::mutate(Graph &g)
     size_t total_nodes = g.nodes().size();
 
     // Iterate over convolution nodes
-    for(unsigned int i = 0; i < total_nodes; ++i)
+    for (unsigned int i = 0; i < total_nodes; ++i)
     {
         INode *node = g.node(i);
-        if(node != nullptr && node->type() == NodeType::ConvolutionLayer && arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1)
+        if (node != nullptr && node->type() == NodeType::ConvolutionLayer &&
+            arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1)
         {
             // Validate node
             backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target());
             Status                    status  = backend.validate_node(*node);
 
             // If grouped convolution is not supported
-            if(!bool(status))
+            if (!bool(status))
             {
                 // Down-cast node
                 auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node);
@@ -151,7 +159,8 @@ void GroupedConvolutionMutator::mutate(Graph &g)
                 ARM_COMPUTE_ERROR_ON(conv_node->input_edge(0) == nullptr || conv_node->input_edge(1) == nullptr);
                 const NodeID input_id   = conv_node->input_edge(0)->producer()->id();
                 const NodeID weights_id = conv_node->input_edge(1)->producer()->id();
-                const NodeID bias_id    = (conv_node->input_edge(2) != nullptr) ? conv_node->input_edge(2)->producer()->id() : EmptyNodeID;
+                const NodeID bias_id =
+                    (conv_node->input_edge(2) != nullptr) ? conv_node->input_edge(2)->producer()->id() : EmptyNodeID;
 
                 // Get driving nodes
                 std::vector<NodeIdxPair> driving_nodes = get_driving_nodes(*node);
@@ -164,14 +173,15 @@ void GroupedConvolutionMutator::mutate(Graph &g)
                 NodeID   latest_nid = g.nodes().size();
 
                 // Create grouped convolution node
-                NodeID grouped_conv_id = create_grouped_convolution(g, params, { input_id, 0 }, weights_id, bias_id,
-                                                                    conv_info, conv_method, fused_act_info, fast_math_hint, num_groups);
+                NodeID grouped_conv_id =
+                    create_grouped_convolution(g, params, {input_id, 0}, weights_id, bias_id, conv_info, conv_method,
+                                               fused_act_info, fast_math_hint, num_groups);
 
                 // Remove convolution node
                 g.remove_node(node->id());
 
                 // Update batch normalization node outputs
-                for(auto &driving_node : driving_nodes)
+                for (auto &driving_node : driving_nodes)
                 {
                     g.add_connection(grouped_conv_id, 0, driving_node.node_id, driving_node.index);
                 }
@@ -180,17 +190,16 @@ void GroupedConvolutionMutator::mutate(Graph &g)
                 g.node(grouped_conv_id)->output(0)->set_accessor(std::move(node_accessor));
 
                 // Configure new tensors and nodes
-                std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(), [](std::unique_ptr<Tensor> &t)
-                {
-                    configure_tensor(t.get());
-                });
-                std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(), [&assigned_target](std::unique_ptr<INode> &n)
-                {
-                    if(n != nullptr)
-                    {
-                        n->set_assigned_target(assigned_target);
-                    }
-                });
+                std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(),
+                              [](std::unique_ptr<Tensor> &t) { configure_tensor(t.get()); });
+                std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(),
+                              [&assigned_target](std::unique_ptr<INode> &n)
+                              {
+                                  if (n != nullptr)
+                                  {
+                                      n->set_assigned_target(assigned_target);
+                                  }
+                              });
             }
         }
     }
diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp
index 61639a8f6f..a51dcc4f42 100644
--- a/src/graph/mutators/InPlaceOperationMutator.cpp
+++ b/src/graph/mutators/InPlaceOperationMutator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,8 +23,16 @@
  */
 #include "arm_compute/graph/mutators/InPlaceOperationMutator.h"
 
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/nodes/DepthwiseConvolutionLayerNode.h"
+#include "arm_compute/graph/nodes/FusedDepthwiseConvolutionBatchNormalizationNode.h"
+
+#include "support/Cast.h"
+
+using namespace arm_compute::utils::cast;
 
 namespace arm_compute
 {
@@ -41,7 +49,7 @@ bool output_edges_are_separate_tensors(Graph &g, const Edge *input_edge)
     const auto input_tensor  = input_edge->tensor();
     const auto input_edge_id = input_edge->id();
 
-    if(parent_node == nullptr)
+    if (parent_node == nullptr)
     {
         return false;
     }
@@ -50,24 +58,155 @@ bool output_edges_are_separate_tensors(Graph &g, const Edge *input_edge)
 
     // If the output is connected to only one edge, then computations can
     // be done in-place.
-    if(output_edges.size() == 1)
+    if (output_edges.size() == 1)
     {
         return true;
     }
 
-    return std::all_of(output_edges.begin(),
-                       output_edges.end(),
-                       [&](const EdgeID & edge_id)
+    return std::all_of(output_edges.begin(), output_edges.end(),
+                       [&](const EdgeID &edge_id)
+                       {
+                           // Skip check on current input edge
+                           if (edge_id == input_edge_id)
+                           {
+                               return true;
+                           }
+
+                           auto edge = g.edge(edge_id);
+                           return edge->tensor() != input_tensor;
+                       });
+}
+
+// If do in-place calculation, then need to use the new output and inherit original output's accessor
+void set_new_output_and_inherit_accessor(std::unique_ptr<INode> &node, Tensor *orig_output, Tensor *new_output)
+{
+    ARM_COMPUTE_LOG_GRAPH_INFO("Switching to in-place computation for the node with ID : "
+                               << node->id() << " and name : " << node->name() << std::endl);
+    // Update accessor
+    new_output->set_accessor(orig_output->extract_accessor());
+    // Update output
+    node->set_output_tensor(new_output->id(), 0);
+}
+
+// Try to mutate the node to perform the depthwise in-place calculation
+void try_in_place_depthwiseconv(std::unique_ptr<INode> &node)
+{
+    // Get input edge
+    Edge *input_edge  = node->input_edge(0);
+    Edge *weight_edge = node->input_edge(1);
+    ARM_COMPUTE_ERROR_ON(input_edge == nullptr || weight_edge == nullptr);
+
+    auto input_tensor  = input_edge->tensor();
+    auto weight_tensor = weight_edge->tensor();
+    ARM_COMPUTE_ERROR_ON(input_tensor == nullptr || weight_tensor == nullptr);
+
+    const auto input_shape = input_tensor->desc().shape;
+    const auto qinfo_input = input_tensor->desc().quant_info;
+
+    const auto weight_shape  = weight_tensor->desc().shape;
+    const auto weight_layout = weight_tensor->desc().layout;
+
+    // Extract PadStrideInfo and depth multiplier
+    PadStrideInfo conv_info{};
+    unsigned int  depth_multiplier{};
+    if (node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer)
     {
-        // Skip check on current input edge
-        if(edge_id == input_edge_id)
-        {
-            return true;
-        }
+        conv_info =
+            polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->convolution_info();
+        depth_multiplier =
+            polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node.get())->depth_multiplier();
+    }
+    else if (node->type() == NodeType::DepthwiseConvolutionLayer)
+    {
+        conv_info        = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->convolution_info();
+        depth_multiplier = polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node.get())->depth_multiplier();
+    }
+
+    // Get current output tensor
+    auto current_output_tensor = node->output(0);
+    ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr);
+    const auto out_shape = current_output_tensor->desc().shape;
+    const auto qinfo_out = current_output_tensor->desc().quant_info;
+
+    bool input_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, input_shape, 0) &&
+                              (qinfo_input == qinfo_out) && (input_tensor->accessor() == nullptr);
 
-        auto edge = g.edge(edge_id);
-        return edge->tensor() != input_tensor;
-    });
+    // Specify conditions with which input can be in-placed
+    input_can_in_place &= weight_layout == input_tensor->desc().layout && weight_layout == DataLayout::NHWC;
+
+    const int  weights_width_idx  = get_data_layout_dimension_index(weight_layout, DataLayoutDimension::WIDTH);
+    const int  weights_height_idx = get_data_layout_dimension_index(weight_layout, DataLayoutDimension::HEIGHT);
+    const bool is_1x1             = weight_shape[weights_width_idx] == 1U && weight_shape[weights_height_idx] == 1U;
+    input_can_in_place &= is_1x1;
+
+    input_can_in_place &= depth_multiplier == 1;
+    input_can_in_place &= conv_info.stride() == std::make_pair(1U, 1U);
+    input_can_in_place &= !conv_info.has_padding();
+    // NOTE: Dilation should also be (1, 1). However currently dilation is not supported in the depthwise conv node
+
+    if (input_can_in_place)
+    {
+        set_new_output_and_inherit_accessor(node, current_output_tensor, input_tensor);
+    }
+    else
+    {
+        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor "
+                                      "or the quantization info are different.\n");
+    }
+}
+
+// Try to mutate the node to perform the elementwise in-place calculation
+void try_in_place_elementwise(std::unique_ptr<INode> &node)
+{
+    // Get input edge
+    Edge *input0_edge = node->input_edge(0);
+    Edge *input1_edge = node->input_edge(1);
+    ARM_COMPUTE_ERROR_ON(input0_edge == nullptr || input1_edge == nullptr);
+
+    auto input0_tensor = input0_edge->tensor();
+    auto input1_tensor = input1_edge->tensor();
+    ARM_COMPUTE_ERROR_ON(input0_tensor == nullptr || input1_tensor == nullptr);
+
+    const auto shape0 = input0_tensor->desc().shape;
+    const auto shape1 = input1_tensor->desc().shape;
+    const auto qinfo0 = input0_tensor->desc().quant_info;
+    const auto qinfo1 = input1_tensor->desc().quant_info;
+
+    const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+    // Inputs are not broadcast compatible
+    if (out_shape.total_size() == 0)
+    {
+        return;
+    }
+
+    // Get current output tensor
+    auto current_output_tensor = node->output(0);
+    ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr);
+    const auto qinfo_out = current_output_tensor->desc().quant_info;
+
+    // Can do in place, if the input has same shape as output, has same quntisation info as output, has same data type as output and input doesn't have accessor.
+    bool input0_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) &&
+                               (qinfo0 == qinfo_out) &&
+                               (input0_tensor->desc().data_type == current_output_tensor->desc().data_type) &&
+                               (input0_tensor->accessor() == nullptr);
+    bool input1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) &&
+                               (qinfo1 == qinfo_out) &&
+                               (input1_tensor->desc().data_type == current_output_tensor->desc().data_type) &&
+                               (input1_tensor->accessor() == nullptr);
+
+    if (input0_can_in_place)
+    {
+        set_new_output_and_inherit_accessor(node, current_output_tensor, input0_tensor);
+    }
+    else if (input1_can_in_place)
+    {
+        set_new_output_and_inherit_accessor(node, current_output_tensor, input1_tensor);
+    }
+    else
+    {
+        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor "
+                                      "or the quantization info are different.\n");
+    }
 }
 } // namespace
 
@@ -83,45 +222,53 @@ IGraphMutator::MutationType InPlaceOperationMutator::type() const
 
 void InPlaceOperationMutator::mutate(Graph &g)
 {
-    std::set<NodeType> in_place_nodes =
-    {
-        NodeType::ActivationLayer,
-        NodeType::BatchNormalizationLayer,
-        NodeType::EltwiseLayer,
-        NodeType::UnaryEltwiseLayer,
-        NodeType::PrintLayer
-    };
+    std::set<NodeType> in_place_nodes = {NodeType::ActivationLayer,
+                                         NodeType::BatchNormalizationLayer,
+                                         NodeType::EltwiseLayer,
+                                         NodeType::UnaryEltwiseLayer,
+                                         NodeType::DepthwiseConvolutionLayer,
+                                         NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer,
+                                         NodeType::PrintLayer};
 
     // Not interested in the order of nodes
-    for(auto &node : g.nodes())
+    for (auto &node : g.nodes())
     {
-        if(node && in_place_nodes.find(node->type()) != std::end(in_place_nodes))
+        if (node && in_place_nodes.find(node->type()) != std::end(in_place_nodes))
         {
             // Get input edge
             Edge *input_edge = node->input_edge(0);
 
             // Check if parent has a single output if yes then force in place calculation else not
-            if((input_edge != nullptr) && output_edges_are_separate_tensors(g, input_edge))
+            if ((input_edge != nullptr) && output_edges_are_separate_tensors(g, input_edge))
             {
-                // Get current and new output tensors
-                auto current_output_tensor = node->output(0);
-                auto new_output_tensor     = input_edge->tensor();
-
-                ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
-
-                // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different
-                if(new_output_tensor->accessor() != nullptr || current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info)
+                if (node->type() == NodeType::EltwiseLayer)
+                {
+                    try_in_place_elementwise(node);
+                }
+                else if (node->type() == NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer ||
+                         node->type() == NodeType::DepthwiseConvolutionLayer)
                 {
-                    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n");
+                    try_in_place_depthwiseconv(node);
                 }
                 else
                 {
-                    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Switching to in-place computation for the node with ID : "
-                                                  << node->id() << " and name : " << node->name() << std::endl);
-                    // Update accessor
-                    new_output_tensor->set_accessor(current_output_tensor->extract_accessor());
-                    // Update output
-                    node->set_output_tensor(new_output_tensor->id(), 0);
+                    // Get current and new output tensors
+                    auto current_output_tensor = node->output(0);
+                    auto new_output_tensor     = input_edge->tensor();
+
+                    ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
+
+                    // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different
+                    if (new_output_tensor->accessor() != nullptr ||
+                        current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info)
+                    {
+                        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to "
+                                                      "the input tensor or the quantization info are different.\n");
+                    }
+                    else
+                    {
+                        set_new_output_and_inherit_accessor(node, current_output_tensor, new_output_tensor);
+                    }
                 }
             }
         }
diff --git a/src/graph/mutators/MutatorUtils.cpp b/src/graph/mutators/MutatorUtils.cpp
new file mode 100644
index 0000000000..f47240eadd
--- /dev/null
+++ b/src/graph/mutators/MutatorUtils.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/graph/mutators/MutatorUtils.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+bool is_padding_in_height_or_width(const DataLayout &layout, const PaddingList &padding_list)
+{
+    if (layout == DataLayout::NCHW || layout == DataLayout::NHWC)
+    {
+        const unsigned int height_index = get_dimension_idx(layout, DataLayoutDimension::HEIGHT);
+        const unsigned int width_index  = get_dimension_idx(layout, DataLayoutDimension::WIDTH);
+
+        for (unsigned int i = 0; i < padding_list.size(); ++i)
+        {
+            if (i != height_index && i != width_index && padding_list[i] != PaddingInfo(0, 0))
+            {
+                // if the index is not either height or width, don't fuse
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    return false;
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/mutators/MutatorUtils.h b/src/graph/mutators/MutatorUtils.h
new file mode 100644
index 0000000000..170d892c93
--- /dev/null
+++ b/src/graph/mutators/MutatorUtils.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_GRAPH_MUTATOR_UTILS_H
+#define ARM_COMPUTE_GRAPH_MUTATOR_UTILS_H
+
+#include "arm_compute/graph/Utils.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+/** Check if padding is in height and/or width dimensions
+ *
+ * @param[in] layout       Data layout of the tensor
+ * @param[in] padding_list List of padding pairs
+ */
+bool is_padding_in_height_or_width(const DataLayout &layout, const PaddingList &padding_list);
+} // namespace graph
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_GRAPH_MUTATOR_UTILS_H */
+\ No newline at end of file
diff --git a/src/graph/mutators/NodeExecutionMethodMutator.cpp b/src/graph/mutators/NodeExecutionMethodMutator.cpp
index 09a3cf50c0..588befecae 100644
--- a/src/graph/mutators/NodeExecutionMethodMutator.cpp
+++ b/src/graph/mutators/NodeExecutionMethodMutator.cpp
@@ -23,11 +23,11 @@
  */
 #include "arm_compute/graph/mutators/NodeExecutionMethodMutator.h"
 
+#include "arm_compute/graph/backends/BackendRegistry.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/backends/BackendRegistry.h"
 #include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/Utils.h"
 
 #include "support/Cast.h"
 
@@ -49,17 +49,17 @@ template <typename Setter>
 void set_default_on_invalid_method(Graph &g, NodeType node_type, Setter &&setter)
 {
     const std::vector<NodeID> &node_ids = g.nodes(node_type);
-    for(auto &node_id : node_ids)
+    for (auto &node_id : node_ids)
     {
         INode *node = g.node(node_id);
-        if(node != nullptr)
+        if (node != nullptr)
         {
             // Validate node
             backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target());
             Status                    status  = backend.validate_node(*node);
 
             // Set default execution method in case of failure
-            if(!bool(status))
+            if (!bool(status))
             {
                 setter(node);
             }
@@ -81,22 +81,26 @@ IGraphMutator::MutationType NodeExecutionMethodMutator::type() const
 void NodeExecutionMethodMutator::mutate(Graph &g)
 {
     // Convolution Layer
-    set_default_on_invalid_method(g, NodeType::ConvolutionLayer, [](INode * n)
-    {
-        ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : "
-                                   << n->id() << " and Name: " << n->name() << std::endl);
-        auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n);
-        casted_node->set_convolution_method(ConvolutionMethod::Default);
-    });
+    set_default_on_invalid_method(g, NodeType::ConvolutionLayer,
+                                  [](INode *n)
+                                  {
+                                      ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : "
+                                                                 << n->id() << " and Name: " << n->name() << std::endl);
+                                      auto *casted_node =
+                                          arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n);
+                                      casted_node->set_convolution_method(ConvolutionMethod::Default);
+                                  });
 
     // Depthwise Convolution Layer
-    set_default_on_invalid_method(g, NodeType::DepthwiseConvolutionLayer, [](INode * n)
-    {
-        ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : "
-                                   << n->id() << " and Name: " << n->name() << std::endl);
-        auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n);
-        casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default);
-    });
+    set_default_on_invalid_method(
+        g, NodeType::DepthwiseConvolutionLayer,
+        [](INode *n)
+        {
+            ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : "
+                                       << n->id() << " and Name: " << n->name() << std::endl);
+            auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n);
+            casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default);
+        });
 }
 } // namespace graph
 } // namespace arm_compute
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index 5a696f8386..998a4a05c7 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,15 +23,18 @@
  */
 #include "arm_compute/graph/mutators/NodeFusionMutator.h"
 
+#include "arm_compute/core/utils/DataTypeUtils.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
 #include "arm_compute/graph/GraphBuilder.h"
 #include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/backends/BackendRegistry.h"
 #include "arm_compute/graph/nodes/FusedConvolutionBatchNormalizationNode.h"
 #include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/Utils.h"
 
+#include "src/graph/mutators/MutatorUtils.h"
 #include "support/Cast.h"
 
+#include <list>
 #include <set>
 
 namespace arm_compute
@@ -40,24 +43,60 @@ namespace graph
 {
 namespace detail
 {
+void transfer_driving_nodes_and_remove_old_node(Graph &g, INode *new_node, INode *old_node, bool add_output_tensor)
+{
+    if (new_node == nullptr || old_node == nullptr)
+    {
+        return;
+    }
+
+    // Get driving nodes of last fusable node
+    std::vector<NodeIdxPair> last_driving_nodes = get_driving_nodes(*old_node);
+
+    // Extract last fusable node accessor if any
+    if (old_node->output(0) == nullptr)
+    {
+        return;
+    }
+    auto old_node_accessor = old_node->output(0)->extract_accessor();
+
+    // Remove node
+    g.remove_node(old_node->id());
+
+    // Update fused node outputs
+    for (auto &driving_node : last_driving_nodes)
+    {
+        g.add_connection(new_node->id(), 0, driving_node.node_id, driving_node.index);
+        if (add_output_tensor)
+        {
+            configure_tensor(new_node->output(0));
+        }
+    }
+
+    // Update accessor to fused node
+    new_node->output(0)->set_accessor(std::move(old_node_accessor));
+}
+
 void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge)
 {
     ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
 
     auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(output_edge->producer());
-    auto *bn_node   = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
+    auto *bn_node =
+        arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
 
     // Not fusing if number of groups is greater than 1
-    if(conv_node->num_groups() > 1)
+    if (conv_node->num_groups() > 1)
     {
         return;
     }
 
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing convolution node with ID : " << output_edge->producer_id()
-                                  << " with BatchNormalization Layer node with ID : " << output_edge->consumer_id() << std::endl);
+    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing convolution node with ID : "
+                                  << output_edge->producer_id() << " with BatchNormalization Layer node with ID : "
+                                  << output_edge->consumer_id() << std::endl);
 
     // Prevent fusion if fused node has an output accessor
-    if(conv_node->output(0)->accessor() == nullptr)
+    if (conv_node->output(0)->accessor() == nullptr)
     {
         const Target assigned_target = conv_node->assigned_target();
 
@@ -77,9 +116,10 @@ void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge
         const auto epsilon = bn_node->epsilon();
 
         // Create the fused node
-        const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationNode>(epsilon, conv_info, num_groups, conv_method, fast_math_hint, act_info);
+        const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationNode>(
+            epsilon, conv_info, num_groups, conv_method, fast_math_hint, act_info);
 
-        if(conv_node->input_edge(2) != nullptr)
+        if (conv_node->input_edge(2) != nullptr)
         {
             auto conv_bias_id = conv_node->input_edge(2)->producer_id();
             g.add_connection(conv_bias_id, 0, fused_id, 2);
@@ -91,45 +131,33 @@ void fuse_convolution_with_batch_normalization(Graph &g, const Edge *output_edge
         g.add_connection(bn_mean_id, 0, fused_id, 3);
         g.add_connection(bn_var_id, 0, fused_id, 4);
 
-        if(bn_node->input_edge(3) != nullptr)
+        if (bn_node->input_edge(3) != nullptr)
         {
             const auto bn_beta_id = bn_node->input_edge(3)->producer_id();
             g.add_connection(bn_beta_id, 0, fused_id, 5);
         }
 
-        if(bn_node->input_edge(4) != nullptr)
+        if (bn_node->input_edge(4) != nullptr)
         {
             const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
             g.add_connection(bn_gamma_id, 0, fused_id, 6);
         }
 
-        auto                     fused_node       = g.node(fused_id);
-        std::vector<NodeIdxPair> bn_driving_nodes = get_driving_nodes(*bn_node);
+        auto fused_node   = g.node(fused_id);
+        auto bn_node_name = bn_node->name();
 
-        // Extract batch normalization node accessor if any
-        auto bn_node_accessor = bn_node->output(0)->extract_accessor();
-        auto bn_node_name     = bn_node->name();
+        transfer_driving_nodes_and_remove_old_node(g, fused_node, bn_node, true);
 
-        // Remove batch normalization node
-        g.remove_node(bn_node->id());
-
-        // Get driving nodes of batch normalization node
-        for(auto &driving_node : bn_driving_nodes)
-        {
-            g.add_connection(fused_id, 0, driving_node.node_id, driving_node.index);
-            configure_tensor(fused_node->output(0));
-        }
-        // Update fused node outputs
-        fused_node->output(0)->set_accessor(std::move(bn_node_accessor));
         fused_node->set_assigned_target(assigned_target);
-        fused_node->set_common_node_parameters(NodeParams{ conv_node->name() + "+" + bn_node_name, assigned_target });
+        fused_node->set_common_node_parameters(NodeParams{conv_node->name() + "+" + bn_node_name, assigned_target});
 
         // Remove convolution node
         g.remove_node(conv_node->id());
     }
     else
     {
-        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of convolution with batch normalization due to the presence of an output accessor\n");
+        ARM_COMPUTE_LOG_GRAPH_VERBOSE(
+            "Prevented fusion of convolution with batch normalization due to the presence of an output accessor\n");
     }
 }
 
@@ -137,14 +165,17 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o
 {
     ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
 
-    auto *depth_conv_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->producer());
-    auto *bn_node         = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
+    auto *depth_conv_node =
+        arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(output_edge->producer());
+    auto *bn_node =
+        arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->consumer());
 
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing depthwise convolution node with ID : " << output_edge->producer_id()
-                                  << " with BatchNormalization Layer node with ID : " << output_edge->consumer_id() << std::endl);
+    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing depthwise convolution node with ID : "
+                                  << output_edge->producer_id() << " with BatchNormalization Layer node with ID : "
+                                  << output_edge->consumer_id() << std::endl);
 
     // Prevent fusion if fused node has an output accessor
-    if(depth_conv_node->output(0)->accessor() == nullptr)
+    if (depth_conv_node->output(0)->accessor() == nullptr)
     {
         const Target assigned_target = depth_conv_node->assigned_target();
 
@@ -164,9 +195,10 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o
         const auto epsilon     = bn_node->epsilon();
 
         // Create the fused node
-        const NodeID fused_id = g.add_node<FusedDepthwiseConvolutionBatchNormalizationNode>(epsilon, conv_info, depth_multiplier, depth_conv_method, act_info);
+        const NodeID fused_id = g.add_node<FusedDepthwiseConvolutionBatchNormalizationNode>(
+            epsilon, conv_info, depth_multiplier, depth_conv_method, act_info);
 
-        if(depth_conv_node->input_edge(2) != nullptr)
+        if (depth_conv_node->input_edge(2) != nullptr)
         {
             const auto conv_bias_id = depth_conv_node->input_edge(2)->producer_id();
             g.add_connection(conv_bias_id, 0, fused_id, 2);
@@ -180,38 +212,29 @@ void fuse_depthwise_convolution_with_batch_normalization(Graph &g, const Edge *o
         g.add_connection(bn_beta_id, 0, fused_id, 5);
         g.add_connection(bn_gamma_id, 0, fused_id, 6);
 
-        auto                     fused_node       = g.node(fused_id);
-        std::vector<NodeIdxPair> bn_driving_nodes = get_driving_nodes(*bn_node);
-
-        // Extract batch normalization node accessor if any
-        auto bn_node_accessor = bn_node->output(0)->extract_accessor();
-        auto bn_node_name     = bn_node->name();
+        auto fused_node   = g.node(fused_id);
+        auto bn_node_name = bn_node->name();
 
-        // Remove batch normalization node
-        g.remove_node(bn_node->id());
+        transfer_driving_nodes_and_remove_old_node(g, fused_node, bn_node, true);
 
-        // Get driving nodes of batch normalization node
-        for(auto &driving_node : bn_driving_nodes)
-        {
-            g.add_connection(fused_id, 0, driving_node.node_id, driving_node.index);
-            configure_tensor(fused_node->output(0));
-        }
-        // Update fused node outputs
-        fused_node->output(0)->set_accessor(std::move(bn_node_accessor));
         fused_node->set_assigned_target(assigned_target);
-        fused_node->set_common_node_parameters(NodeParams{ depth_conv_node->name() + "+" + bn_node_name, assigned_target });
+        fused_node->set_common_node_parameters(
+            NodeParams{depth_conv_node->name() + "+" + bn_node_name, assigned_target});
 
         // Remove convolution node
         g.remove_node(depth_conv_node->id());
     }
     else
     {
-        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of depthwise convolution with batch normalization due to the presence of an output accessor\n");
+        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of depthwise convolution with batch normalization due to the "
+                                      "presence of an output accessor\n");
     }
 }
 
 template <typename N>
-void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set<Activation> &supported_fused_activations)
+void fuse_node_with_activation(Graph                      &g,
+                               const Edge                 *output_edge,
+                               const std::set<Activation> &supported_fused_activations)
 {
     ARM_COMPUTE_ERROR_ON(output_edge == nullptr);
 
@@ -221,73 +244,126 @@ void fuse_node_with_activation(Graph &g, const Edge *output_edge, const std::set
     ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || n_node->output(0) == nullptr);
 
     // Check if activation is supported for fusion
-    if(supported_fused_activations.count(act_node->activation_info().activation()) == 0)
+    if (supported_fused_activations.count(act_node->activation_info().activation()) == 0)
     {
         return;
     }
 
     // EltwiseLayerNode can only be fused when dataype is float
-    if(n_node->type() == NodeType::EltwiseLayer && !is_data_type_float(n_node->output(0)->desc().data_type))
+    if (n_node->type() == NodeType::EltwiseLayer && !is_data_type_float(n_node->output(0)->desc().data_type))
     {
         return;
     }
 
     ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing node with ID : " << output_edge->producer_id()
-                                  << " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl);
+                                                           << " with Activation Layer node with ID : "
+                                                           << output_edge->consumer_id() << std::endl);
 
     // Prevent fusion if fused node has an output accessor
-    if(n_node->output(0)->accessor() == nullptr)
+    if (n_node->output(0)->accessor() == nullptr)
     {
-        // Get driving nodes of activation node
-        std::vector<NodeIdxPair> act_driving_nodes = get_driving_nodes(*act_node);
-
         // Set activation info to fused node
         n_node->set_fused_activation(act_node->activation_info());
 
-        // Extract activation node accessor if any
-        auto act_node_accessor = act_node->output(0)->extract_accessor();
+        transfer_driving_nodes_and_remove_old_node(g, n_node, act_node, false);
+    }
+    else
+    {
+        ARM_COMPUTE_LOG_GRAPH_VERBOSE(
+            "Prevented fusion of node with activation due to the presence of an output accessor\n");
+    }
+}
 
-        // Remove activation node
-        g.remove_node(act_node->id());
+template <typename N>
+void fuse_pad_with_convolution(Graph &g, const Edge *output_edge)
+{
+    auto *pad_node  = arm_compute::utils::cast::polymorphic_downcast<PadLayerNode *>(output_edge->producer());
+    auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->consumer());
+
+    const Edge *input_edge = pad_node->input_edge(0);
+    if (input_edge != nullptr && input_edge->tensor() != nullptr && pad_node->output(0)->accessor() == nullptr &&
+        pad_node->pad_value().get<float>() == 0.0)
+    {
+        const DataLayout  layout       = input_edge->tensor()->desc().layout;
+        const PaddingList padding_list = pad_node->padding();
+
+        const unsigned int height_index = get_dimension_idx(layout, DataLayoutDimension::HEIGHT);
+        const unsigned int width_index  = get_dimension_idx(layout, DataLayoutDimension::WIDTH);
 
-        // Update fused node outputs
-        for(auto &driving_node : act_driving_nodes)
+        const PaddingInfo pad_w = width_index < padding_list.size() ? padding_list[width_index] : PaddingInfo(0, 0);
+        const PaddingInfo pad_h = height_index < padding_list.size() ? padding_list[height_index] : PaddingInfo(0, 0);
+
+        if (is_padding_in_height_or_width(layout, padding_list))
         {
-            g.add_connection(n_node->id(), 0, driving_node.node_id, driving_node.index);
+            // Add paddings to the convolution node
+            const PadStrideInfo conv_info = conv_node->convolution_info();
+            const PadStrideInfo new_conv_info(conv_info.stride().first, conv_info.stride().second,
+                                              conv_info.pad_left() + pad_w.first, conv_info.pad_right() + pad_w.second,
+                                              conv_info.pad_top() + pad_h.first, conv_info.pad_bottom() + pad_h.second,
+                                              conv_info.round());
+            conv_node->set_convolution_info(new_conv_info);
+
+            // Update drivers of the convolution node
+            std::vector<NodeIdxPair> pad_driver_nodes = get_driver_nodes(*pad_node);
+            g.remove_node(pad_node->id());
+
+            // Update fused node inputs
+            for (auto &driver_node : pad_driver_nodes)
+            {
+                g.add_connection(driver_node.node_id, driver_node.index, conv_node->id(), 0);
+            }
         }
-
-        // Update accessor to fused node
-        n_node->output(0)->set_accessor(std::move(act_node_accessor));
-    }
-    else
-    {
-        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of node with activation due to the presence of an output accessor\n");
     }
 }
 
 template <typename N1, typename N2, typename F, typename... Args>
-void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&... optional_arguments)
+void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&...optional_arguments)
 {
     // Note that fused nodes may be added to the end of the node list.
     // Instead of only looping over the original list of nodes, we loop over the current node list which could be growing.
     // This is intentional as it probes the newly added fused nodes for further fusing opportunities.
-    for(unsigned int i = 0; i < g.nodes().size(); ++i)
+    for (unsigned int i = 0; i < g.nodes().size(); ++i)
     {
         auto node = g.node(i);
-        // Check if the node is of type N and not a branching node
-        if(node && node->type() == N1::node_type && node->output_edges().size() == 1)
+        // Check if the node is of type N1 and not a branching node
+        if (node && node->type() == N1::node_type && node->output_edges().size() == 1)
         {
             const auto output_edge_id = *node->output_edges().begin();
             const auto output_edge    = g.edge(output_edge_id);
 
-            // Check if following node is an activation layer node
-            if((output_edge != nullptr) && (output_edge->consumer() != nullptr) && (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer()))
+            // Check if following node is a type N2 node
+            if ((output_edge != nullptr) && (output_edge->consumer() != nullptr) &&
+                (output_edge->consumer()->type() == N2::node_type) && prec(*output_edge->producer()))
             {
                 fuse_fcn(g, output_edge, optional_arguments...);
             }
         }
     }
 }
+
+template <typename N1, typename F, typename... Args>
+void fuse_layer(Graph &g, std::function<bool(INode &)> const &prec, const F fuse_fcn, Args &&...optional_arguments)
+{
+    // Note that fused nodes may be added to the end of the node list.
+    // Instead of only looping over the original list of nodes, we loop over the current node list which could be growing.
+    // This is intentional as it probes the newly added fused nodes for further fusing opportunities.
+    for (unsigned int i = 0; i < g.nodes().size(); ++i)
+    {
+        auto node = g.node(i);
+        // Check if the node is of type N1 and not a branching node
+        if (node && node->type() == N1::node_type && node->output_edges().size() == 1)
+        {
+            const auto output_edge_id = *node->output_edges().begin();
+            const auto output_edge    = g.edge(output_edge_id);
+
+            // Check if it's the correct target
+            if ((output_edge != nullptr) && (output_edge->consumer() != nullptr) && prec(*output_edge->producer()))
+            {
+                fuse_fcn(g, output_edge, i, optional_arguments...);
+            }
+        }
+    }
+}
 } // namespace detail
 
 const char *NodeFusionMutator::name()
@@ -303,43 +379,50 @@ IGraphMutator::MutationType NodeFusionMutator::type() const
 void NodeFusionMutator::mutate(Graph &g)
 {
     // Supported activations when fusing
-    const std::set<Activation> supported_fused_activations = { Activation::ABS, Activation::BOUNDED_RELU, Activation::ELU,
-                                                               Activation::HARD_SWISH, Activation::IDENTITY, Activation::LEAKY_RELU,
-                                                               Activation::LINEAR, Activation::LOGISTIC, Activation::LU_BOUNDED_RELU,
-                                                               Activation::RELU, Activation::SOFT_RELU, Activation::SQRT,
-                                                               Activation::SQUARE, Activation::TANH
-                                                             };
+    const std::set<Activation> supported_fused_activations = {
+        Activation::ABS,        Activation::BOUNDED_RELU, Activation::ELU,
+        Activation::HARD_SWISH, Activation::IDENTITY,     Activation::LEAKY_RELU,
+        Activation::LINEAR,     Activation::LOGISTIC,     Activation::LU_BOUNDED_RELU,
+        Activation::RELU,       Activation::SOFT_RELU,    Activation::SQRT,
+        Activation::SQUARE,     Activation::TANH};
 
     // Preconditions
-    auto empty_prec = [](INode &)
-    {
-        return true;
-    };
-    auto cl_target_prec = [](INode & n)
-    {
-        return n.assigned_target() == Target::CL;
-    };
-    auto qs8_prec = [&g](INode & n)
+    auto empty_prec     = [](INode &) { return true; };
+    auto cl_target_prec = [](INode &n) { return n.assigned_target() == Target::CL; };
+    auto qs8_prec       = [&g](INode &n)
     {
         ARM_COMPUTE_ERROR_ON(n.output(0) == nullptr);
 
         const auto output_edge_id = *n.output_edges().begin();
         const auto output_edge    = g.edge(output_edge_id);
         // To perform fusion the two nodes must have same output quantization information
-        const bool same_qinfo     = n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info;
+        const bool same_qinfo = n.output(0)->desc().quant_info == output_edge->producer()->output(0)->desc().quant_info;
         const bool output_qasymm8 = n.output(0)->desc().data_type == DataType::QASYMM8;
 
         return (output_qasymm8 && same_qinfo) || !output_qasymm8;
     };
 
     // Fusion mutations
-    detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations);
-    detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations);
-    detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations);
-    detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations);
-    detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>(g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations);
-    detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_convolution_with_batch_normalization);
-    detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization);
+
+    detail::fuse_layer<PadLayerNode, ConvolutionLayerNode>(g, empty_prec,
+                                                           detail::fuse_pad_with_convolution<ConvolutionLayerNode>);
+    detail::fuse_layer<PadLayerNode, DepthwiseConvolutionLayerNode>(
+        g, empty_prec, detail::fuse_pad_with_convolution<DepthwiseConvolutionLayerNode>);
+    detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(
+        g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations);
+    detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(
+        g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations);
+    detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(
+        g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations);
+    detail::fuse_layer<FullyConnectedLayerNode, ActivationLayerNode>(
+        g, empty_prec, detail::fuse_node_with_activation<FullyConnectedLayerNode>, supported_fused_activations);
+    detail::fuse_layer<EltwiseLayerNode, ActivationLayerNode>(
+        g, cl_target_prec, detail::fuse_node_with_activation<EltwiseLayerNode>, supported_fused_activations);
+    // The fusion of BatchNormalizationLayer must occur after the fusion of ActivationLayer. Because FusedConvolutionBatchNormalizationNode assumes the BatchNormalization is already fused with activation, if any
+    detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(
+        g, empty_prec, detail::fuse_convolution_with_batch_normalization);
+    detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(
+        g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization);
 }
 } // namespace graph
 } // namespace arm_compute
diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
index 2c28a1a2d1..533f8944cf 100644
--- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp
+++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
@@ -23,12 +23,12 @@
  */
 #include "arm_compute/graph/mutators/SplitLayerSubTensorMutator.h"
 
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
 #include "arm_compute/graph/algorithms/TopologicalSort.h"
 #include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/Logger.h"
 #include "arm_compute/graph/nodes/SplitLayerNode.h"
+#include "arm_compute/graph/Utils.h"
 
 #include "support/Cast.h"
 #include "support/Iterable.h"
@@ -50,7 +50,7 @@ IGraphMutator::MutationType SplitLayerSubTensorMutator::type() const
 void SplitLayerSubTensorMutator::mutate(Graph &g)
 {
     // Early exit if no Split layers exist in graph
-    if(g.nodes(NodeType::SplitLayer).empty())
+    if (g.nodes(NodeType::SplitLayer).empty())
     {
         return;
     }
@@ -59,23 +59,23 @@ void SplitLayerSubTensorMutator::mutate(Graph &g)
     std::vector<NodeID> topological_sorted_node_ids = dfs(g);
 
     // Should be in reverse order of execution
-    for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
+    for (auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
     {
         INode *node = g.node(node_id);
-        if(node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
+        if (node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
         {
             // Get output tensor
             Tensor *input_tensor = node->input(0);
 
             // Check that all tensor have the same target and are valid
             bool is_valid = std::all_of(node->outputs().cbegin(), node->outputs().cend(),
-                                        [&](const TensorID & tid)
-            {
-                return (g.tensor(tid) != nullptr) && (g.tensor(tid)->desc().target == input_tensor->desc().target);
-            });
+                                        [&](const TensorID &tid) {
+                                            return (g.tensor(tid) != nullptr) &&
+                                                   (g.tensor(tid)->desc().target == input_tensor->desc().target);
+                                        });
 
             // Create subtensors
-            if(is_valid && is_target_supported(input_tensor->desc().target))
+            if (is_valid && is_target_supported(input_tensor->desc().target))
             {
                 ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : "
                                               << node->id() << " and name : " << node->name() << std::endl);
@@ -87,15 +87,18 @@ void SplitLayerSubTensorMutator::mutate(Graph &g)
                 const bool         extend_parent = (axis < 2);
 
                 // Create sub-tensor handles
-                for(unsigned int i = 0; i < node->outputs().size(); ++i)
+                for (unsigned int i = 0; i < node->outputs().size(); ++i)
                 {
                     Tensor           *output_tensor = node->output(i);
                     const TensorShape output_shape  = output_tensor->desc().shape;
                     Coordinates       coords;
-                    std::tie(std::ignore, coords) = split_node->compute_output_descriptor(input_tensor->desc(), num_splits, axis, i);
+                    std::tie(std::ignore, coords) =
+                        split_node->compute_output_descriptor(input_tensor->desc(), num_splits, axis, i);
 
-                    backends::IDeviceBackend      &backend = backends::BackendRegistry::get().get_backend(output_tensor->desc().target);
-                    std::unique_ptr<ITensorHandle> handle  = backend.create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent);
+                    backends::IDeviceBackend &backend =
+                        backends::BackendRegistry::get().get_backend(output_tensor->desc().target);
+                    std::unique_ptr<ITensorHandle> handle =
+                        backend.create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent);
                     output_tensor->set_handle(std::move(handle));
                 }
             }
diff --git a/src/graph/mutators/SyntheticDataTypeMutator.cpp b/src/graph/mutators/SyntheticDataTypeMutator.cpp
index 74d040b81d..3dc2480e85 100644
--- a/src/graph/mutators/SyntheticDataTypeMutator.cpp
+++ b/src/graph/mutators/SyntheticDataTypeMutator.cpp
@@ -26,8 +26,8 @@
 #include "arm_compute/graph/GraphBuilder.h"
 #include "arm_compute/graph/ITensorAccessor.h"
 #include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Utils.h"
 #include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/Utils.h"
 
 #include "support/Cast.h"
 
@@ -62,14 +62,12 @@ public:
  */
 bool is_mutation_supported(Graph &g)
 {
-    const std::set<NodeType> unsupported_node_types = { NodeType::DetectionOutputLayer,
-                                                        NodeType::NormalizationLayer,
-                                                        NodeType::PriorBoxLayer
-                                                      };
+    const std::set<NodeType> unsupported_node_types = {NodeType::DetectionOutputLayer, NodeType::NormalizationLayer,
+                                                       NodeType::PriorBoxLayer};
 
-    for(const auto &utype : unsupported_node_types)
+    for (const auto &utype : unsupported_node_types)
     {
-        if(!g.nodes(utype).empty())
+        if (!g.nodes(utype).empty())
         {
             return false;
         }
@@ -83,12 +81,12 @@ bool is_mutation_supported(Graph &g)
  */
 void remove_optimized_nodes(Graph &g)
 {
-    const std::set<NodeType> optimized_node_types = { NodeType::BatchNormalizationLayer };
+    const std::set<NodeType> optimized_node_types = {NodeType::BatchNormalizationLayer};
 
-    for(const auto &opt_type : optimized_node_types)
+    for (const auto &opt_type : optimized_node_types)
     {
         const std::vector<NodeID> opt_nodes_ids = g.nodes(opt_type);
-        for(const auto &node_id : opt_nodes_ids)
+        for (const auto &node_id : opt_nodes_ids)
         {
             INode *node = g.node(node_id);
 
@@ -108,7 +106,7 @@ void remove_optimized_nodes(Graph &g)
             g.remove_node(node->id());
 
             // Update connections
-            for(auto &driving_node : driving_nodes)
+            for (auto &driving_node : driving_nodes)
             {
                 g.add_connection(producer->id(), producer_edge_id, driving_node.node_id, driving_node.index);
             }
@@ -123,11 +121,11 @@ void remove_optimized_nodes(Graph &g)
 void convert_tensors(Graph &g, DataType data_type)
 {
     auto &tensors = g.tensors();
-    for(auto &tensor : tensors)
+    for (auto &tensor : tensors)
     {
-        if(tensor != nullptr)
+        if (tensor != nullptr)
         {
-            switch(data_type)
+            switch (data_type)
             {
                 case DataType::QASYMM8:
                 case DataType::QASYMM8_SIGNED:
@@ -156,7 +154,7 @@ template <typename NT>
 void convert_special_node(Graph &g, std::function<bool(INode *, Tensor *)> const &f)
 {
     const std::vector<NodeID> nodes_ids = g.nodes(NT::node_type);
-    for(const auto &nodes_id : nodes_ids)
+    for (const auto &nodes_id : nodes_ids)
     {
         INode *node = arm_compute::utils::cast::polymorphic_downcast<NT *>(g.node(nodes_id));
         ARM_COMPUTE_ERROR_ON(node == nullptr);
@@ -174,41 +172,41 @@ void convert_special_node(Graph &g, std::function<bool(INode *, Tensor *)> const
  */
 void convert_special_tensors(Graph &g)
 {
-    auto softmax_func = [](INode * node, Tensor * tensor)
+    auto softmax_func = [](INode *node, Tensor *tensor)
     {
         ARM_COMPUTE_UNUSED(node);
-        if(tensor->desc().data_type == DataType::QASYMM8)
+        if (tensor->desc().data_type == DataType::QASYMM8)
         {
             tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0);
         }
-        else if(tensor->desc().data_type == DataType::QASYMM8_SIGNED)
+        else if (tensor->desc().data_type == DataType::QASYMM8_SIGNED)
         {
             tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, -128);
         }
         return true;
     };
 
-    auto act_func = [](INode * node, Tensor * tensor)
+    auto act_func = [](INode *node, Tensor *tensor)
     {
         auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(node);
-        if(tensor->desc().data_type == DataType::QASYMM8)
+        if (tensor->desc().data_type == DataType::QASYMM8)
         {
-            if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH)
+            if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH)
             {
                 tensor->desc().quant_info = QuantizationInfo(1.f / 128.f, 128);
             }
-            else if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+            else if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC)
             {
                 tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, 0);
             }
         }
-        else if(tensor->desc().data_type == DataType::QASYMM8_SIGNED)
+        else if (tensor->desc().data_type == DataType::QASYMM8_SIGNED)
         {
-            if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH)
+            if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::TANH)
             {
                 tensor->desc().quant_info = QuantizationInfo(1.f / 128.f, 0);
             }
-            else if(act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+            else if (act_node->activation_info().activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC)
             {
                 tensor->desc().quant_info = QuantizationInfo(1.f / 256.f, -128);
             }
@@ -228,22 +226,19 @@ void convert_special_tensors(Graph &g)
  */
 void handle_nodes_with_bias(Graph &g)
 {
-    const std::set<NodeType> special_node_types = { NodeType::ConvolutionLayer,
-                                                    NodeType::DeconvolutionLayer,
-                                                    NodeType::DepthwiseConvolutionLayer,
-                                                    NodeType::FullyConnectedLayer
-                                                  };
+    const std::set<NodeType> special_node_types = {NodeType::ConvolutionLayer, NodeType::DeconvolutionLayer,
+                                                   NodeType::DepthwiseConvolutionLayer, NodeType::FullyConnectedLayer};
 
-    for(const auto &spc_type : special_node_types)
+    for (const auto &spc_type : special_node_types)
     {
         const std::vector<NodeID> scp_nodes_ids = g.nodes(spc_type);
-        for(const auto &node_id : scp_nodes_ids)
+        for (const auto &node_id : scp_nodes_ids)
         {
             INode *node = g.node(node_id);
-            if(node != nullptr)
+            if (node != nullptr)
             {
                 Tensor *tensor = node->input(2);
-                if(tensor != nullptr)
+                if (tensor != nullptr)
                 {
                     tensor->desc().data_type = DataType::S32;
                 }
@@ -253,8 +248,8 @@ void handle_nodes_with_bias(Graph &g)
                     params.name = params.name.empty() ? "" : params.name + "Bias";
 
                     TensorDescriptor b_desc = node->input(1)->desc();
-                    auto             depth  = b_desc.shape[get_dimension_idx(b_desc.layout, DataLayoutDimension::BATCHES)];
-                    b_desc.shape            = TensorShape(depth);
+                    auto depth   = b_desc.shape[get_dimension_idx(b_desc.layout, DataLayoutDimension::BATCHES)];
+                    b_desc.shape = TensorShape(depth);
 
                     auto accessor = std::make_unique<EmptyAccessor>();
                     auto b_nid    = GraphBuilder::add_const_node(g, params, b_desc, std::move(accessor));
@@ -266,8 +261,7 @@ void handle_nodes_with_bias(Graph &g)
 }
 } // namespace
 
-SyntheticDataTypeMutator::SyntheticDataTypeMutator(DataType mutate_type)
-    : _mutate_type{ mutate_type }
+SyntheticDataTypeMutator::SyntheticDataTypeMutator(DataType mutate_type) : _mutate_type{mutate_type}
 {
 }
 
@@ -283,7 +277,7 @@ IGraphMutator::MutationType SyntheticDataTypeMutator::type() const
 
 void SyntheticDataTypeMutator::mutate(Graph &g)
 {
-    if(is_mutation_supported(g))
+    if (is_mutation_supported(g))
     {
         // Remove nodes that get optimized out (e.g. BatchNorm)
         remove_optimized_nodes(g);
diff --git a/src/graph/nodes/ActivationLayerNode.cpp b/src/graph/nodes/ActivationLayerNode.cpp
index cf65d83a5e..1773afcb16 100644
--- a/src/graph/nodes/ActivationLayerNode.cpp
+++ b/src/graph/nodes/ActivationLayerNode.cpp
@@ -44,7 +44,7 @@ ActivationLayerInfo ActivationLayerNode::activation_info() const
 
 bool ActivationLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -63,7 +63,7 @@ TensorDescriptor ActivationLayerNode::configure_output(size_t idx) const
     ARM_COMPUTE_ERROR_ON(src == nullptr);
 
     TensorDescriptor output_info = src->desc();
-    if(!_out_quant_info.empty())
+    if (!_out_quant_info.empty())
     {
         output_info.quant_info = _out_quant_info;
     }
diff --git a/src/graph/nodes/ArgMinMaxLayerNode.cpp b/src/graph/nodes/ArgMinMaxLayerNode.cpp
index 63163b9e2c..5adebc950a 100644
--- a/src/graph/nodes/ArgMinMaxLayerNode.cpp
+++ b/src/graph/nodes/ArgMinMaxLayerNode.cpp
@@ -23,16 +23,18 @@
  */
 #include "arm_compute/graph/nodes/ArgMinMaxLayerNode.h"
 
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
 
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
 namespace arm_compute
 {
 namespace graph
 {
-ArgMinMaxLayerNode::ArgMinMaxLayerNode(ReductionOperation op, unsigned int axis, DataType out_data_type, QuantizationInfo out_quant_info)
+ArgMinMaxLayerNode::ArgMinMaxLayerNode(ReductionOperation op,
+                                       unsigned int       axis,
+                                       DataType           out_data_type,
+                                       QuantizationInfo   out_quant_info)
     : _op(op), _axis(axis), _out_data_type(out_data_type), _out_quant_info(std::move(out_quant_info))
 {
     _input_edges.resize(1, EmptyEdgeID);
@@ -56,7 +58,7 @@ DataType ArgMinMaxLayerNode::out_data_type() const
 
 bool ArgMinMaxLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -75,17 +77,18 @@ TensorDescriptor ArgMinMaxLayerNode::configure_output(size_t idx) const
     ARM_COMPUTE_ERROR_ON(src == nullptr);
 
     TensorDescriptor output_info = src->desc();
-    if(!_out_quant_info.empty())
+    if (!_out_quant_info.empty())
     {
         output_info.quant_info = _out_quant_info;
     }
 
-    if(_out_data_type != DataType::UNKNOWN)
+    if (_out_data_type != DataType::UNKNOWN)
     {
         output_info.data_type = _out_data_type;
     }
 
-    TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(output_info.shape, _axis, false);
+    TensorShape output_shape =
+        arm_compute::misc::shape_calculator::compute_reduced_shape(output_info.shape, _axis, false);
     output_info.set_shape(output_shape);
 
     return output_info;
diff --git a/src/graph/nodes/BatchNormalizationLayerNode.cpp b/src/graph/nodes/BatchNormalizationLayerNode.cpp
index ceca0e2715..c317123e8d 100644
--- a/src/graph/nodes/BatchNormalizationLayerNode.cpp
+++ b/src/graph/nodes/BatchNormalizationLayerNode.cpp
@@ -55,7 +55,7 @@ void BatchNormalizationLayerNode::set_fused_activation(ActivationLayerInfo fused
 
 bool BatchNormalizationLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -86,4 +86,4 @@ void BatchNormalizationLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/BoundingBoxTransformLayerNode.cpp b/src/graph/nodes/BoundingBoxTransformLayerNode.cpp
index f3f4f91075..8e52174639 100644
--- a/src/graph/nodes/BoundingBoxTransformLayerNode.cpp
+++ b/src/graph/nodes/BoundingBoxTransformLayerNode.cpp
@@ -23,17 +23,15 @@
  */
 #include "arm_compute/graph/nodes/BoundingBoxTransformLayerNode.h"
 
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
 
-#include "arm_compute/core/Helpers.h"
-
 namespace arm_compute
 {
 namespace graph
 {
-BoundingBoxTransformLayerNode::BoundingBoxTransformLayerNode(BoundingBoxTransformInfo &info)
-    : _bbox_info(info)
+BoundingBoxTransformLayerNode::BoundingBoxTransformLayerNode(BoundingBoxTransformInfo &info) : _bbox_info(info)
 {
     _input_edges.resize(2, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -46,7 +44,7 @@ const BoundingBoxTransformInfo &BoundingBoxTransformLayerNode::info() const
 
 bool BoundingBoxTransformLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
diff --git a/src/graph/nodes/ChannelShuffleLayerNode.cpp b/src/graph/nodes/ChannelShuffleLayerNode.cpp
index 5102e4b6da..3cb9e23eca 100644
--- a/src/graph/nodes/ChannelShuffleLayerNode.cpp
+++ b/src/graph/nodes/ChannelShuffleLayerNode.cpp
@@ -30,8 +30,7 @@ namespace arm_compute
 {
 namespace graph
 {
-ChannelShuffleLayerNode::ChannelShuffleLayerNode(unsigned int num_groups)
-    : _num_groups(num_groups)
+ChannelShuffleLayerNode::ChannelShuffleLayerNode(unsigned int num_groups) : _num_groups(num_groups)
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -44,7 +43,7 @@ unsigned int ChannelShuffleLayerNode::num_groups() const
 
 bool ChannelShuffleLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -75,4 +74,4 @@ void ChannelShuffleLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/ConcatenateLayerNode.cpp b/src/graph/nodes/ConcatenateLayerNode.cpp
index 3f3c70f3bb..8e5393a5e4 100644
--- a/src/graph/nodes/ConcatenateLayerNode.cpp
+++ b/src/graph/nodes/ConcatenateLayerNode.cpp
@@ -24,17 +24,17 @@
 #include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
 
 #include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
 #include "arm_compute/graph/Utils.h"
 
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
 namespace arm_compute
 {
 namespace graph
 {
-ConcatenateLayerNode::ConcatenateLayerNode(unsigned int total_nodes, descriptors::ConcatLayerDescriptor concat_descriptor)
+ConcatenateLayerNode::ConcatenateLayerNode(unsigned int                       total_nodes,
+                                           descriptors::ConcatLayerDescriptor concat_descriptor)
     : _total_nodes(total_nodes), _concat_descriptor(std::move(concat_descriptor)), _is_enabled(true)
 {
     _input_edges.resize(_total_nodes, EmptyEdgeID);
@@ -73,7 +73,7 @@ TensorDescriptor ConcatenateLayerNode::compute_output_descriptor(const std::vect
     // Extract shapes
     std::vector<const TensorShape *> shapes;
     shapes.reserve(input_descriptors.size());
-    for(auto &input_descriptor : input_descriptors)
+    for (auto &input_descriptor : input_descriptors)
     {
         shapes.emplace_back(&input_descriptor.shape);
     }
@@ -85,7 +85,7 @@ TensorDescriptor ConcatenateLayerNode::compute_output_descriptor(const std::vect
 
 bool ConcatenateLayerNode::forward_descriptors()
 {
-    if(_outputs[0] != NullTensorID)
+    if (_outputs[0] != NullTensorID)
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -101,24 +101,22 @@ TensorDescriptor ConcatenateLayerNode::configure_output(size_t idx) const
     ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
 
     // Check if all input tensors are set
-    bool are_all_inputs_set = std::all_of(std::begin(_input_edges), std::end(_input_edges), [](const EdgeID & eid)
-    {
-        return eid != EmptyEdgeID;
-    });
+    bool are_all_inputs_set = std::all_of(std::begin(_input_edges), std::end(_input_edges),
+                                          [](const EdgeID &eid) { return eid != EmptyEdgeID; });
 
     TensorDescriptor output_info = {};
 
-    if(are_all_inputs_set)
+    if (are_all_inputs_set)
     {
         std::vector<TensorDescriptor> inputs_descriptors;
-        for(unsigned int i = 0; i < _input_edges.size(); ++i)
+        for (unsigned int i = 0; i < _input_edges.size(); ++i)
         {
             const Tensor *t = _graph->tensor(input_id(i));
             ARM_COMPUTE_ERROR_ON(t == nullptr);
             inputs_descriptors.push_back(t->desc());
         }
         output_info = compute_output_descriptor(inputs_descriptors, _concat_descriptor.axis);
-        if(!_concat_descriptor.output_qinfo.empty())
+        if (!_concat_descriptor.output_qinfo.empty())
         {
             output_info.quant_info = _concat_descriptor.output_qinfo;
         }
diff --git a/src/graph/nodes/ConstNode.cpp b/src/graph/nodes/ConstNode.cpp
index eb96d63888..6e8fbff71a 100644
--- a/src/graph/nodes/ConstNode.cpp
+++ b/src/graph/nodes/ConstNode.cpp
@@ -30,15 +30,14 @@ namespace arm_compute
 {
 namespace graph
 {
-ConstNode::ConstNode(TensorDescriptor desc)
-    : _desc(std::move(desc))
+ConstNode::ConstNode(TensorDescriptor desc) : _desc(std::move(desc))
 {
     _outputs.resize(1, NullTensorID);
 }
 
 bool ConstNode::forward_descriptors()
 {
-    if(output_id(0) != NullTensorID)
+    if (output_id(0) != NullTensorID)
     {
         Tensor *t = output(0);
         ARM_COMPUTE_ERROR_ON(t == nullptr);
diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp
index a9825702ce..f0263fc84a 100644
--- a/src/graph/nodes/ConvolutionLayerNode.cpp
+++ b/src/graph/nodes/ConvolutionLayerNode.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,12 @@ ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo     info,
                                            ConvolutionMethod method,
                                            FastMathHint      fast_math_hint,
                                            QuantizationInfo  out_quant_info)
-    : _info(std::move(info)), _num_groups(num_groups), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(std::move(out_quant_info)), _fused_activation()
+    : _info(std::move(info)),
+      _num_groups(num_groups),
+      _method(method),
+      _fast_math_hint(fast_math_hint),
+      _out_quant_info(std::move(out_quant_info)),
+      _fused_activation()
 {
     _input_edges.resize(3, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -83,6 +88,11 @@ void ConvolutionLayerNode::set_fused_activation(ActivationLayerInfo fused_activa
     _fused_activation = fused_activation;
 }
 
+void ConvolutionLayerNode::set_convolution_info(PadStrideInfo info)
+{
+    _info = info;
+}
+
 TensorDescriptor ConvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
                                                                  const TensorDescriptor &weights_descriptor,
                                                                  const PadStrideInfo    &info)
@@ -95,20 +105,22 @@ TensorDescriptor ConvolutionLayerNode::compute_output_descriptor(const TensorDes
     const unsigned int kernel_width  = get_dimension_size(weights_descriptor, DataLayoutDimension::WIDTH);
     const unsigned int kernel_height = get_dimension_size(weights_descriptor, DataLayoutDimension::HEIGHT);
 
-    std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, kernel_width, kernel_height, info);
+    std::tie(output_width, output_height) =
+        scaled_dimensions(input_width, input_height, kernel_width, kernel_height, info);
 
     const DataLayout data_layout       = input_descriptor.layout;
     TensorDescriptor output_descriptor = input_descriptor;
     output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::WIDTH), output_width);
     output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::HEIGHT), output_height);
-    output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::CHANNEL), weights_descriptor.shape[3]);
+    output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::CHANNEL),
+                                weights_descriptor.shape[3]);
 
     return output_descriptor;
 }
 
 bool ConvolutionLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -127,7 +139,7 @@ TensorDescriptor ConvolutionLayerNode::configure_output(size_t idx) const
     ARM_COMPUTE_ERROR_ON(src == nullptr || weights == nullptr);
 
     TensorDescriptor output_info = compute_output_descriptor(src->desc(), weights->desc(), _info);
-    if(!_out_quant_info.empty())
+    if (!_out_quant_info.empty())
     {
         output_info.quant_info = _out_quant_info;
     }
diff --git a/src/graph/nodes/DeconvolutionLayerNode.cpp b/src/graph/nodes/DeconvolutionLayerNode.cpp
index 3542d5ad10..2058ab21e5 100644
--- a/src/graph/nodes/DeconvolutionLayerNode.cpp
+++ b/src/graph/nodes/DeconvolutionLayerNode.cpp
@@ -56,20 +56,22 @@ TensorDescriptor DeconvolutionLayerNode::compute_output_descriptor(const TensorD
     const unsigned int kernel_width  = get_dimension_size(weights_descriptor, DataLayoutDimension::WIDTH);
     const unsigned int kernel_height = get_dimension_size(weights_descriptor, DataLayoutDimension::HEIGHT);
 
-    std::tie(output_width, output_height) = deconvolution_output_dimensions(input_width, input_height, kernel_width, kernel_height, info);
+    std::tie(output_width, output_height) =
+        deconvolution_output_dimensions(input_width, input_height, kernel_width, kernel_height, info);
 
     const DataLayout data_layout       = input_descriptor.layout;
     TensorDescriptor output_descriptor = input_descriptor;
     output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::WIDTH), output_width);
     output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::HEIGHT), output_height);
-    output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::CHANNEL), weights_descriptor.shape[3]);
+    output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::CHANNEL),
+                                weights_descriptor.shape[3]);
 
     return output_descriptor;
 }
 
 bool DeconvolutionLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -89,7 +91,7 @@ TensorDescriptor DeconvolutionLayerNode::configure_output(size_t idx) const
 
     TensorDescriptor output_info = compute_output_descriptor(src->desc(), weights->desc(), descriptor.info);
 
-    if(!descriptor.out_quant_info.empty())
+    if (!descriptor.out_quant_info.empty())
     {
         output_info.set_quantization_info(descriptor.out_quant_info);
     }
diff --git a/src/graph/nodes/DepthToSpaceLayerNode.cpp b/src/graph/nodes/DepthToSpaceLayerNode.cpp
index b70ac56a07..0b914a0e56 100644
--- a/src/graph/nodes/DepthToSpaceLayerNode.cpp
+++ b/src/graph/nodes/DepthToSpaceLayerNode.cpp
@@ -32,8 +32,7 @@ namespace arm_compute
 {
 namespace graph
 {
-DepthToSpaceLayerNode::DepthToSpaceLayerNode(int block_shape)
-    : _block_shape(block_shape)
+DepthToSpaceLayerNode::DepthToSpaceLayerNode(int block_shape) : _block_shape(block_shape)
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -44,7 +43,8 @@ int DepthToSpaceLayerNode::block_shape() const
     return _block_shape;
 }
 
-TensorDescriptor DepthToSpaceLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor, int block_shape)
+TensorDescriptor DepthToSpaceLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
+                                                                  int                     block_shape)
 {
     using namespace arm_compute::helpers::tensor_transform;
 
@@ -53,14 +53,15 @@ TensorDescriptor DepthToSpaceLayerNode::compute_output_descriptor(const TensorDe
 
     // Set descriptor shape
     TensorDescriptor output_descriptor = input_descriptor;
-    output_descriptor.shape            = misc::shape_calculator::compute_depth_to_space_shape(input_shape, data_layout, block_shape);
+    output_descriptor.shape =
+        misc::shape_calculator::compute_depth_to_space_shape(input_shape, data_layout, block_shape);
 
     return output_descriptor;
 }
 
 bool DepthToSpaceLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
diff --git a/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp b/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp
index 42fb0fd6da..92d7266088 100644
--- a/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp
+++ b/src/graph/nodes/DepthwiseConvolutionLayerNode.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,9 +32,15 @@ namespace arm_compute
 {
 namespace graph
 {
-DepthwiseConvolutionLayerNode::DepthwiseConvolutionLayerNode(PadStrideInfo info, int depth_multiplier, DepthwiseConvolutionMethod method,
-                                                             QuantizationInfo out_quant_info)
-    : _info(std::move(info)), _depth_multiplier(depth_multiplier), _method(method), _out_quant_info(std::move(out_quant_info)), _fused_activation()
+DepthwiseConvolutionLayerNode::DepthwiseConvolutionLayerNode(PadStrideInfo              info,
+                                                             int                        depth_multiplier,
+                                                             DepthwiseConvolutionMethod method,
+                                                             QuantizationInfo           out_quant_info)
+    : _info(std::move(info)),
+      _depth_multiplier(depth_multiplier),
+      _method(method),
+      _out_quant_info(std::move(out_quant_info)),
+      _fused_activation()
 {
     _input_edges.resize(3, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -70,6 +76,11 @@ void DepthwiseConvolutionLayerNode::set_fused_activation(ActivationLayerInfo fus
     _fused_activation = fused_activation;
 }
 
+void DepthwiseConvolutionLayerNode::set_convolution_info(PadStrideInfo info)
+{
+    _info = info;
+}
+
 TensorDescriptor DepthwiseConvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
                                                                           const TensorDescriptor &weights_descriptor,
                                                                           const PadStrideInfo    &info,
@@ -84,20 +95,22 @@ TensorDescriptor DepthwiseConvolutionLayerNode::compute_output_descriptor(const
     const unsigned int kernel_width   = get_dimension_size(weights_descriptor, DataLayoutDimension::WIDTH);
     const unsigned int kernel_height  = get_dimension_size(weights_descriptor, DataLayoutDimension::HEIGHT);
 
-    std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, kernel_width, kernel_height, info);
+    std::tie(output_width, output_height) =
+        scaled_dimensions(input_width, input_height, kernel_width, kernel_height, info);
 
     const DataLayout data_layout       = input_descriptor.layout;
     TensorDescriptor output_descriptor = input_descriptor;
     output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::WIDTH), output_width);
     output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::HEIGHT), output_height);
-    output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::CHANNEL), input_channels * depth_multiplier);
+    output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::CHANNEL),
+                                input_channels * depth_multiplier);
 
     return output_descriptor;
 }
 
 bool DepthwiseConvolutionLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -116,7 +129,7 @@ TensorDescriptor DepthwiseConvolutionLayerNode::configure_output(size_t idx) con
     ARM_COMPUTE_ERROR_ON(src == nullptr || weights == nullptr);
 
     TensorDescriptor output_info = compute_output_descriptor(src->desc(), weights->desc(), _info, _depth_multiplier);
-    if(!_out_quant_info.empty())
+    if (!_out_quant_info.empty())
     {
         output_info.quant_info = _out_quant_info;
     }
@@ -134,4 +147,4 @@ void DepthwiseConvolutionLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/DequantizationLayerNode.cpp b/src/graph/nodes/DequantizationLayerNode.cpp
index 14c4752f12..3ea000852a 100644
--- a/src/graph/nodes/DequantizationLayerNode.cpp
+++ b/src/graph/nodes/DequantizationLayerNode.cpp
@@ -40,7 +40,7 @@ DequantizationLayerNode::DequantizationLayerNode()
 
 bool DequantizationLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -74,4 +74,4 @@ void DequantizationLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/DetectionOutputLayerNode.cpp b/src/graph/nodes/DetectionOutputLayerNode.cpp
index fc6f531ee0..65ddd2f5bc 100644
--- a/src/graph/nodes/DetectionOutputLayerNode.cpp
+++ b/src/graph/nodes/DetectionOutputLayerNode.cpp
@@ -32,8 +32,7 @@ namespace arm_compute
 {
 namespace graph
 {
-DetectionOutputLayerNode::DetectionOutputLayerNode(DetectionOutputLayerInfo detection_info)
-    : _info(detection_info)
+DetectionOutputLayerNode::DetectionOutputLayerNode(DetectionOutputLayerInfo detection_info) : _info(detection_info)
 {
     _input_edges.resize(3, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -47,7 +46,8 @@ DetectionOutputLayerInfo DetectionOutputLayerNode::detection_output_info() const
 TensorDescriptor DetectionOutputLayerNode::compute_output_descriptor(const TensorDescriptor         &input_descriptor,
                                                                      const DetectionOutputLayerInfo &info)
 {
-    const unsigned int max_size = info.keep_top_k() * ((input_descriptor.shape.num_dimensions() > 1) ? input_descriptor.shape[1] : 1);
+    const unsigned int max_size =
+        info.keep_top_k() * ((input_descriptor.shape.num_dimensions() > 1) ? input_descriptor.shape[1] : 1);
 
     TensorDescriptor output_descriptor = input_descriptor;
     output_descriptor.shape.set(0, detection_size);
@@ -58,7 +58,8 @@ TensorDescriptor DetectionOutputLayerNode::compute_output_descriptor(const Tenso
 
 bool DetectionOutputLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (input_id(2) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (input_id(2) != NullTensorID) &&
+        (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
diff --git a/src/graph/nodes/DetectionPostProcessLayerNode.cpp b/src/graph/nodes/DetectionPostProcessLayerNode.cpp
index 2c5005af30..af3fc03d67 100644
--- a/src/graph/nodes/DetectionPostProcessLayerNode.cpp
+++ b/src/graph/nodes/DetectionPostProcessLayerNode.cpp
@@ -46,10 +46,11 @@ DetectionPostProcessLayerInfo DetectionPostProcessLayerNode::detection_post_proc
 
 bool DetectionPostProcessLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (input_id(2) != NullTensorID) && (output_id(0) != NullTensorID) && (output_id(1) != NullTensorID)
-       && (output_id(2) != NullTensorID) && (output_id(3) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (input_id(2) != NullTensorID) &&
+        (output_id(0) != NullTensorID) && (output_id(1) != NullTensorID) && (output_id(2) != NullTensorID) &&
+        (output_id(3) != NullTensorID))
     {
-        for(unsigned int i = 0; i < 4; ++i)
+        for (unsigned int i = 0; i < 4; ++i)
         {
             Tensor *dst = output(i);
             ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -68,7 +69,7 @@ TensorDescriptor DetectionPostProcessLayerNode::configure_output(size_t idx) con
     TensorDescriptor   output_desc;
     const unsigned int num_detected_box = _info.max_detections() * _info.max_classes_per_detection();
 
-    switch(idx)
+    switch (idx)
     {
         case 0:
             // Configure boxes output
@@ -101,4 +102,4 @@ void DetectionPostProcessLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/DummyNode.cpp b/src/graph/nodes/DummyNode.cpp
index 6fa9fbaf56..b5f37bd79b 100644
--- a/src/graph/nodes/DummyNode.cpp
+++ b/src/graph/nodes/DummyNode.cpp
@@ -32,8 +32,7 @@ namespace arm_compute
 {
 namespace graph
 {
-DummyNode::DummyNode(TensorShape shape)
-    : _shape(shape)
+DummyNode::DummyNode(TensorShape shape) : _shape(shape)
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -41,7 +40,7 @@ DummyNode::DummyNode(TensorShape shape)
 
 bool DummyNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -75,4 +74,4 @@ void DummyNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/EltwiseLayerNode.cpp b/src/graph/nodes/EltwiseLayerNode.cpp
index 4426e953ee..3f7a08e64d 100644
--- a/src/graph/nodes/EltwiseLayerNode.cpp
+++ b/src/graph/nodes/EltwiseLayerNode.cpp
@@ -31,8 +31,7 @@ namespace arm_compute
 {
 namespace graph
 {
-EltwiseLayerNode::EltwiseLayerNode(const descriptors::EltwiseLayerDescriptor &descriptor)
-    : descriptor(descriptor)
+EltwiseLayerNode::EltwiseLayerNode(const descriptors::EltwiseLayerDescriptor &descriptor) : descriptor(descriptor)
 {
     _input_edges.resize(2, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -70,7 +69,7 @@ void EltwiseLayerNode::set_fused_activation(ActivationLayerInfo fused_activation
 
 bool EltwiseLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -97,7 +96,7 @@ TensorDescriptor EltwiseLayerNode::configure_output(size_t idx) const
 
     output_info.set_shape(out_shape);
 
-    if(!descriptor.out_quant_info.empty())
+    if (!descriptor.out_quant_info.empty())
     {
         output_info.set_quantization_info(descriptor.out_quant_info);
     }
@@ -134,7 +133,7 @@ void UnaryEltwiseLayerNode::set_fused_activation(ActivationLayerInfo fused_activ
 
 bool UnaryEltwiseLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -153,7 +152,7 @@ TensorDescriptor UnaryEltwiseLayerNode::configure_output(size_t idx) const
 
     auto output_info = src->desc();
 
-    if(!descriptor.out_quant_info.empty())
+    if (!descriptor.out_quant_info.empty())
     {
         output_info.set_quantization_info(descriptor.out_quant_info);
     }
diff --git a/src/graph/nodes/FlattenLayerNode.cpp b/src/graph/nodes/FlattenLayerNode.cpp
index 48519a1695..952df2f3ec 100644
--- a/src/graph/nodes/FlattenLayerNode.cpp
+++ b/src/graph/nodes/FlattenLayerNode.cpp
@@ -38,7 +38,7 @@ FlattenLayerNode::FlattenLayerNode()
 
 bool FlattenLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -72,4 +72,4 @@ void FlattenLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index 442f636b61..1eed69ddaf 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,22 +21,36 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "arm_compute/graph/nodes/FullyConnectedLayerNode.h"
-
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/nodes/FullyConnectedLayerNode.h"
 
 namespace arm_compute
 {
 namespace graph
 {
-FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, QuantizationInfo out_quant_info, FullyConnectedLayerInfo fc_info)
-    : _num_outputs(num_outputs), _out_quant_info(std::move(out_quant_info)), _info(fc_info)
+FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int            num_outputs,
+                                                 QuantizationInfo        out_quant_info,
+                                                 FullyConnectedLayerInfo fc_info,
+                                                 FastMathHint            fast_math_hint)
+    : _num_outputs(num_outputs),
+      _out_quant_info(std::move(out_quant_info)),
+      _info(fc_info),
+      _fast_math_hint(fast_math_hint)
 {
     _input_edges.resize(3, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
 }
+void FullyConnectedLayerNode::set_fast_math_hint(FastMathHint hint)
+{
+    _fast_math_hint = hint;
+}
+
+FastMathHint FullyConnectedLayerNode::fast_math_hint() const
+{
+    return _fast_math_hint;
+}
 
 void FullyConnectedLayerNode::set_fused_activation(ActivationLayerInfo fused_activation)
 {
@@ -51,11 +65,11 @@ TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const Tenso
     unsigned int num_weights    = 1;
     unsigned int num_dimensions = input_descriptor.shape.num_dimensions();
     // Ignore the batch dimension if there is one:
-    if(num_dimensions == 2 || num_dimensions == 4)
+    if (num_dimensions == 2 || num_dimensions == 4)
     {
         num_dimensions--;
     }
-    for(unsigned int i = 0; i < num_dimensions; i++)
+    for (unsigned int i = 0; i < num_dimensions; i++)
     {
         num_weights *= input_descriptor.shape[i];
     }
@@ -64,13 +78,13 @@ TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const Tenso
     weights_descriptor.shape            = TensorShape(num_weights, num_outputs);
 
     // If weights are tranposed, use tranposed shape
-    if(!fc_info.transpose_weights)
+    if (!fc_info.transpose_weights)
     {
         weights_descriptor.shape = TensorShape(num_outputs, num_weights);
     }
 
     // Set quantization info if present
-    if(!weights_quant_info.empty())
+    if (!weights_quant_info.empty())
     {
         weights_descriptor.quant_info = weights_quant_info;
     }
@@ -84,7 +98,7 @@ TensorDescriptor FullyConnectedLayerNode::compute_output_descriptor(const Tensor
 {
     // Note: Only 1D batch space is supported at the moment
     unsigned int batches = input_descriptor.shape[1];
-    if(input_descriptor.shape.num_dimensions() > 2)
+    if (input_descriptor.shape.num_dimensions() > 2)
     {
         batches = input_descriptor.shape[3];
     }
@@ -94,7 +108,7 @@ TensorDescriptor FullyConnectedLayerNode::compute_output_descriptor(const Tensor
     output_descriptor.shape            = TensorShape(num_outputs, batches);
 
     // Set quantization info if present
-    if(!out_quant_info.empty())
+    if (!out_quant_info.empty())
     {
         output_descriptor.quant_info = out_quant_info;
     }
@@ -109,7 +123,7 @@ FullyConnectedLayerInfo FullyConnectedLayerNode::info() const
 
 bool FullyConnectedLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -138,4 +152,4 @@ void FullyConnectedLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/FusedConvolutionBatchNormalizationNode.cpp b/src/graph/nodes/FusedConvolutionBatchNormalizationNode.cpp
index de995ebee9..9d37e84acf 100644
--- a/src/graph/nodes/FusedConvolutionBatchNormalizationNode.cpp
+++ b/src/graph/nodes/FusedConvolutionBatchNormalizationNode.cpp
@@ -32,12 +32,18 @@ namespace arm_compute
 {
 namespace graph
 {
-FusedConvolutionBatchNormalizationNode::FusedConvolutionBatchNormalizationNode(float epsilon, PadStrideInfo info,
-                                                                               unsigned int      num_groups,
-                                                                               ConvolutionMethod method,
-                                                                               FastMathHint      fast_math_hint,
+FusedConvolutionBatchNormalizationNode::FusedConvolutionBatchNormalizationNode(float               epsilon,
+                                                                               PadStrideInfo       info,
+                                                                               unsigned int        num_groups,
+                                                                               ConvolutionMethod   method,
+                                                                               FastMathHint        fast_math_hint,
                                                                                ActivationLayerInfo fused_activation)
-    : _epsilon(epsilon), _info(std::move(info)), _num_groups(num_groups), _method(method), _fast_math_hint(fast_math_hint), _fused_activation(fused_activation)
+    : _epsilon(epsilon),
+      _info(std::move(info)),
+      _num_groups(num_groups),
+      _method(method),
+      _fast_math_hint(fast_math_hint),
+      _fused_activation(fused_activation)
 {
     _input_edges.resize(7, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -88,9 +94,8 @@ void FusedConvolutionBatchNormalizationNode::set_fused_activation(ActivationLaye
     _fused_activation = fused_activation;
 }
 
-TensorDescriptor FusedConvolutionBatchNormalizationNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
-                                                                                   const TensorDescriptor &weights_descriptor,
-                                                                                   const PadStrideInfo    &info)
+TensorDescriptor FusedConvolutionBatchNormalizationNode::compute_output_descriptor(
+    const TensorDescriptor &input_descriptor, const TensorDescriptor &weights_descriptor, const PadStrideInfo &info)
 {
     unsigned int output_width  = 0;
     unsigned int output_height = 0;
@@ -100,20 +105,22 @@ TensorDescriptor FusedConvolutionBatchNormalizationNode::compute_output_descript
     const unsigned int kernel_width  = get_dimension_size(weights_descriptor, DataLayoutDimension::WIDTH);
     const unsigned int kernel_height = get_dimension_size(weights_descriptor, DataLayoutDimension::HEIGHT);
 
-    std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, kernel_width, kernel_height, info);
+    std::tie(output_width, output_height) =
+        scaled_dimensions(input_width, input_height, kernel_width, kernel_height, info);
 
     const DataLayout data_layout       = input_descriptor.layout;
     TensorDescriptor output_descriptor = input_descriptor;
     output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::WIDTH), output_width);
     output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::HEIGHT), output_height);
-    output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::CHANNEL), weights_descriptor.shape[3]);
+    output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::CHANNEL),
+                                weights_descriptor.shape[3]);
 
     return output_descriptor;
 }
 
 bool FusedConvolutionBatchNormalizationNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
diff --git a/src/graph/nodes/FusedDepthwiseConvolutionBatchNormalizationNode.cpp b/src/graph/nodes/FusedDepthwiseConvolutionBatchNormalizationNode.cpp
index c022450b9d..c51641d64c 100644
--- a/src/graph/nodes/FusedDepthwiseConvolutionBatchNormalizationNode.cpp
+++ b/src/graph/nodes/FusedDepthwiseConvolutionBatchNormalizationNode.cpp
@@ -32,18 +32,24 @@ namespace arm_compute
 {
 namespace graph
 {
-FusedDepthwiseConvolutionBatchNormalizationNode::FusedDepthwiseConvolutionBatchNormalizationNode(float                      epsilon,
-                                                                                                 PadStrideInfo              info,
-                                                                                                 unsigned int               depth_multiplier,
-                                                                                                 DepthwiseConvolutionMethod method,
-                                                                                                 ActivationLayerInfo        fused_activation)
-    : _epsilon(epsilon), _info(std::move(info)), _depth_multiplier(depth_multiplier), _method(method), _fused_activation(fused_activation)
+FusedDepthwiseConvolutionBatchNormalizationNode::FusedDepthwiseConvolutionBatchNormalizationNode(
+    float                      epsilon,
+    PadStrideInfo              info,
+    unsigned int               depth_multiplier,
+    DepthwiseConvolutionMethod method,
+    ActivationLayerInfo        fused_activation)
+    : _epsilon(epsilon),
+      _info(std::move(info)),
+      _depth_multiplier(depth_multiplier),
+      _method(method),
+      _fused_activation(fused_activation)
 {
     _input_edges.resize(7, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
 }
 
-void FusedDepthwiseConvolutionBatchNormalizationNode::set_depthwise_convolution_method(DepthwiseConvolutionMethod method)
+void FusedDepthwiseConvolutionBatchNormalizationNode::set_depthwise_convolution_method(
+    DepthwiseConvolutionMethod method)
 {
     _method = method;
 }
@@ -78,10 +84,11 @@ void FusedDepthwiseConvolutionBatchNormalizationNode::set_fused_activation(Activ
     _fused_activation = fused_activation;
 }
 
-TensorDescriptor FusedDepthwiseConvolutionBatchNormalizationNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
-                                                                                            const TensorDescriptor &weights_descriptor,
-                                                                                            const PadStrideInfo    &info,
-                                                                                            int                     depth_multiplier)
+TensorDescriptor
+FusedDepthwiseConvolutionBatchNormalizationNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
+                                                                           const TensorDescriptor &weights_descriptor,
+                                                                           const PadStrideInfo    &info,
+                                                                           int                     depth_multiplier)
 {
     unsigned int output_width  = 0;
     unsigned int output_height = 0;
@@ -92,19 +99,22 @@ TensorDescriptor FusedDepthwiseConvolutionBatchNormalizationNode::compute_output
     const unsigned int kernel_width   = get_dimension_size(weights_descriptor, DataLayoutDimension::WIDTH);
     const unsigned int kernel_height  = get_dimension_size(weights_descriptor, DataLayoutDimension::HEIGHT);
 
-    std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, kernel_width, kernel_height, info);
+    std::tie(output_width, output_height) =
+        scaled_dimensions(input_width, input_height, kernel_width, kernel_height, info);
 
     TensorDescriptor output_descriptor = input_descriptor;
     output_descriptor.shape.set(get_dimension_idx(output_descriptor.layout, DataLayoutDimension::WIDTH), output_width);
-    output_descriptor.shape.set(get_dimension_idx(output_descriptor.layout, DataLayoutDimension::HEIGHT), output_height);
-    output_descriptor.shape.set(get_dimension_idx(output_descriptor.layout, DataLayoutDimension::CHANNEL), input_channels * depth_multiplier);
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor.layout, DataLayoutDimension::HEIGHT),
+                                output_height);
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor.layout, DataLayoutDimension::CHANNEL),
+                                input_channels * depth_multiplier);
 
     return output_descriptor;
 }
 
 bool FusedDepthwiseConvolutionBatchNormalizationNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
diff --git a/src/graph/nodes/GenerateProposalsLayerNode.cpp b/src/graph/nodes/GenerateProposalsLayerNode.cpp
index 9f36862818..1671a47a95 100644
--- a/src/graph/nodes/GenerateProposalsLayerNode.cpp
+++ b/src/graph/nodes/GenerateProposalsLayerNode.cpp
@@ -23,17 +23,15 @@
  */
 #include "arm_compute/graph/nodes/GenerateProposalsLayerNode.h"
 
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
 
-#include "arm_compute/core/Helpers.h"
-
 namespace arm_compute
 {
 namespace graph
 {
-GenerateProposalsLayerNode::GenerateProposalsLayerNode(GenerateProposalsInfo &info)
-    : _info(info)
+GenerateProposalsLayerNode::GenerateProposalsLayerNode(GenerateProposalsInfo &info) : _info(info)
 {
     _input_edges.resize(3, EmptyEdgeID);
     _outputs.resize(3, NullTensorID);
@@ -46,10 +44,10 @@ const GenerateProposalsInfo &GenerateProposalsLayerNode::info() const
 
 bool GenerateProposalsLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (input_id(2) != NullTensorID) && (output_id(0) != NullTensorID) && (output_id(1) != NullTensorID)
-       && (output_id(2) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (input_id(2) != NullTensorID) &&
+        (output_id(0) != NullTensorID) && (output_id(1) != NullTensorID) && (output_id(2) != NullTensorID))
     {
-        for(unsigned int i = 0; i < 3; ++i)
+        for (unsigned int i = 0; i < 3; ++i)
         {
             Tensor *dst = output(i);
             ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -68,7 +66,7 @@ TensorDescriptor GenerateProposalsLayerNode::configure_output(size_t idx) const
     ARM_COMPUTE_ERROR_ON(src == nullptr);
     TensorDescriptor output_desc = src->desc();
 
-    switch(idx)
+    switch (idx)
     {
         case 0:
             // Configure proposals output
diff --git a/src/graph/nodes/InputNode.cpp b/src/graph/nodes/InputNode.cpp
index 072281f259..7408bc265d 100644
--- a/src/graph/nodes/InputNode.cpp
+++ b/src/graph/nodes/InputNode.cpp
@@ -30,15 +30,14 @@ namespace arm_compute
 {
 namespace graph
 {
-InputNode::InputNode(TensorDescriptor desc)
-    : _desc(std::move(desc))
+InputNode::InputNode(TensorDescriptor desc) : _desc(std::move(desc))
 {
     _outputs.resize(1, NullTensorID);
 }
 
 bool InputNode::forward_descriptors()
 {
-    if(output_id(0) != NullTensorID)
+    if (output_id(0) != NullTensorID)
     {
         Tensor *t = output(0);
         ARM_COMPUTE_ERROR_ON(t == nullptr);
diff --git a/src/graph/nodes/L2NormalizeLayerNode.cpp b/src/graph/nodes/L2NormalizeLayerNode.cpp
index 0c35a335fa..1a57cf0199 100644
--- a/src/graph/nodes/L2NormalizeLayerNode.cpp
+++ b/src/graph/nodes/L2NormalizeLayerNode.cpp
@@ -30,18 +30,15 @@ namespace arm_compute
 {
 namespace graph
 {
-L2NormalizeLayerNode::L2NormalizeLayerNode()
-    : L2NormalizeLayerNode(0, 1e-12f)
+L2NormalizeLayerNode::L2NormalizeLayerNode() : L2NormalizeLayerNode(0, 1e-12f)
 {
 }
 
-L2NormalizeLayerNode::L2NormalizeLayerNode(int axis)
-    : L2NormalizeLayerNode(axis, 1e-12f)
+L2NormalizeLayerNode::L2NormalizeLayerNode(int axis) : L2NormalizeLayerNode(axis, 1e-12f)
 {
 }
 
-L2NormalizeLayerNode::L2NormalizeLayerNode(int axis, float epsilon)
-    : _axis(axis), _epsilon(epsilon)
+L2NormalizeLayerNode::L2NormalizeLayerNode(int axis, float epsilon) : _axis(axis), _epsilon(epsilon)
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -49,7 +46,7 @@ L2NormalizeLayerNode::L2NormalizeLayerNode(int axis, float epsilon)
 
 bool L2NormalizeLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -92,4 +89,4 @@ void L2NormalizeLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/NormalizationLayerNode.cpp b/src/graph/nodes/NormalizationLayerNode.cpp
index eaa1bcf924..b18bb7dd93 100644
--- a/src/graph/nodes/NormalizationLayerNode.cpp
+++ b/src/graph/nodes/NormalizationLayerNode.cpp
@@ -31,8 +31,7 @@ namespace arm_compute
 {
 namespace graph
 {
-NormalizationLayerNode::NormalizationLayerNode(NormalizationLayerInfo norm_info)
-    : _info(norm_info)
+NormalizationLayerNode::NormalizationLayerNode(NormalizationLayerInfo norm_info) : _info(norm_info)
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -45,7 +44,7 @@ NormalizationLayerInfo NormalizationLayerNode::normalization_info() const
 
 bool NormalizationLayerNode::forward_descriptors()
 {
-    if(input_id(0) != NullTensorID && (output_id(0) != NullTensorID))
+    if (input_id(0) != NullTensorID && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -76,4 +75,4 @@ void NormalizationLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/NormalizePlanarYUVLayerNode.cpp b/src/graph/nodes/NormalizePlanarYUVLayerNode.cpp
index 113d0a541f..cac96606ea 100644
--- a/src/graph/nodes/NormalizePlanarYUVLayerNode.cpp
+++ b/src/graph/nodes/NormalizePlanarYUVLayerNode.cpp
@@ -39,7 +39,7 @@ NormalizePlanarYUVLayerNode::NormalizePlanarYUVLayerNode()
 
 bool NormalizePlanarYUVLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
diff --git a/src/graph/nodes/PReluLayerNode.cpp b/src/graph/nodes/PReluLayerNode.cpp
index 378c18e3bb..2b50fe9234 100644
--- a/src/graph/nodes/PReluLayerNode.cpp
+++ b/src/graph/nodes/PReluLayerNode.cpp
@@ -38,7 +38,7 @@ PReluLayerNode::PReluLayerNode()
 
 bool PReluLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
diff --git a/src/graph/nodes/PadLayerNode.cpp b/src/graph/nodes/PadLayerNode.cpp
index 6424370d41..336e7de05a 100644
--- a/src/graph/nodes/PadLayerNode.cpp
+++ b/src/graph/nodes/PadLayerNode.cpp
@@ -23,17 +23,15 @@
  */
 #include "arm_compute/graph/nodes/PadLayerNode.h"
 
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
 
-#include "arm_compute/core/Helpers.h"
-
 namespace arm_compute
 {
 namespace graph
 {
-PadLayerNode::PadLayerNode(const PaddingList &padding, PixelValue pad_value)
-    : _padding(padding), _pad_value(pad_value)
+PadLayerNode::PadLayerNode(const PaddingList &padding, PixelValue pad_value) : _padding(padding), _pad_value(pad_value)
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -51,7 +49,7 @@ PixelValue PadLayerNode::pad_value() const
 
 bool PadLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -71,7 +69,7 @@ TensorDescriptor PadLayerNode::configure_output(size_t idx) const
 
     TensorDescriptor  output_desc = src->desc();
     const TensorShape input_shape = src->desc().shape;
-    for(size_t dim = 0; dim < _padding.size(); ++dim)
+    for (size_t dim = 0; dim < _padding.size(); ++dim)
     {
         output_desc.shape.set(dim, _padding[dim].first + input_shape[dim] + _padding[dim].second);
     }
diff --git a/src/graph/nodes/PermuteLayerNode.cpp b/src/graph/nodes/PermuteLayerNode.cpp
index b311ee1301..db53722363 100644
--- a/src/graph/nodes/PermuteLayerNode.cpp
+++ b/src/graph/nodes/PermuteLayerNode.cpp
@@ -23,17 +23,15 @@
  */
 #include "arm_compute/graph/nodes/PermuteLayerNode.h"
 
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
 
-#include "arm_compute/core/Helpers.h"
-
 namespace arm_compute
 {
 namespace graph
 {
-PermuteLayerNode::PermuteLayerNode(PermutationVector perm, DataLayout layout)
-    : _perm(perm), _layout(layout)
+PermuteLayerNode::PermuteLayerNode(PermutationVector perm, DataLayout layout) : _perm(perm), _layout(layout)
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -46,7 +44,7 @@ const PermutationVector &PermuteLayerNode::permutation_vector() const
 
 bool PermuteLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -66,7 +64,7 @@ TensorDescriptor PermuteLayerNode::configure_output(size_t idx) const
 
     TensorDescriptor output_desc = src->desc();
     permute(output_desc.shape, _perm);
-    if(_layout != DataLayout::UNKNOWN)
+    if (_layout != DataLayout::UNKNOWN)
     {
         output_desc.layout = _layout;
     }
@@ -84,4 +82,4 @@ void PermuteLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/PoolingLayerNode.cpp b/src/graph/nodes/PoolingLayerNode.cpp
index 4ecf924a5e..ac954acbe3 100644
--- a/src/graph/nodes/PoolingLayerNode.cpp
+++ b/src/graph/nodes/PoolingLayerNode.cpp
@@ -32,8 +32,7 @@ namespace arm_compute
 {
 namespace graph
 {
-PoolingLayerNode::PoolingLayerNode(PoolingLayerInfo pool_info)
-    : _info(std::move(pool_info))
+PoolingLayerNode::PoolingLayerNode(PoolingLayerInfo pool_info) : _info(std::move(pool_info))
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -55,7 +54,8 @@ TensorDescriptor PoolingLayerNode::compute_output_descriptor(const TensorDescrip
     const unsigned int pool_size_x  = info.is_global_pooling ? input_width : info.pool_size.width;
     const unsigned int pool_size_y  = info.is_global_pooling ? input_height : info.pool_size.height;
 
-    std::tie(pooled_width, pooled_height) = scaled_dimensions(input_width, input_height, pool_size_x, pool_size_y, info.pad_stride_info);
+    std::tie(pooled_width, pooled_height) =
+        scaled_dimensions(input_width, input_height, pool_size_x, pool_size_y, info.pad_stride_info);
 
     const DataLayout data_layout       = input_descriptor.layout;
     TensorDescriptor output_descriptor = input_descriptor;
@@ -67,7 +67,7 @@ TensorDescriptor PoolingLayerNode::compute_output_descriptor(const TensorDescrip
 
 bool PoolingLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -98,4 +98,4 @@ void PoolingLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/PrintLayerNode.cpp b/src/graph/nodes/PrintLayerNode.cpp
index da408d8c4d..82a340005b 100644
--- a/src/graph/nodes/PrintLayerNode.cpp
+++ b/src/graph/nodes/PrintLayerNode.cpp
@@ -32,7 +32,9 @@ namespace arm_compute
 {
 namespace graph
 {
-PrintLayerNode::PrintLayerNode(std::ostream &stream, const IOFormatInfo &format_info, const std::function<ITensor *(ITensor *)> transform)
+PrintLayerNode::PrintLayerNode(std::ostream                             &stream,
+                               const IOFormatInfo                       &format_info,
+                               const std::function<ITensor *(ITensor *)> transform)
     : _stream(stream), _format_info(format_info), _transform(transform)
 {
     _input_edges.resize(1, EmptyEdgeID);
@@ -56,7 +58,7 @@ const std::function<ITensor *(ITensor *)> PrintLayerNode::transform() const
 
 bool PrintLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -88,4 +90,4 @@ void PrintLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/PriorBoxLayerNode.cpp b/src/graph/nodes/PriorBoxLayerNode.cpp
index f017ead880..5ffb173333 100644
--- a/src/graph/nodes/PriorBoxLayerNode.cpp
+++ b/src/graph/nodes/PriorBoxLayerNode.cpp
@@ -32,8 +32,7 @@ namespace arm_compute
 {
 namespace graph
 {
-PriorBoxLayerNode::PriorBoxLayerNode(PriorBoxLayerInfo prior_info)
-    : _info(std::move(prior_info))
+PriorBoxLayerNode::PriorBoxLayerNode(PriorBoxLayerInfo prior_info) : _info(std::move(prior_info))
 {
     _input_edges.resize(2, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -44,7 +43,7 @@ PriorBoxLayerInfo PriorBoxLayerNode::priorbox_info() const
     return _info;
 }
 
-TensorDescriptor PriorBoxLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
+TensorDescriptor PriorBoxLayerNode::compute_output_descriptor(const TensorDescriptor  &input_descriptor,
                                                               const PriorBoxLayerInfo &info)
 {
     const unsigned int layer_width  = get_dimension_size(input_descriptor, DataLayoutDimension::WIDTH);
@@ -61,7 +60,7 @@ TensorDescriptor PriorBoxLayerNode::compute_output_descriptor(const TensorDescri
 
 bool PriorBoxLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
diff --git a/src/graph/nodes/QuantizationLayerNode.cpp b/src/graph/nodes/QuantizationLayerNode.cpp
index 08e2a4d961..0dd2da919d 100644
--- a/src/graph/nodes/QuantizationLayerNode.cpp
+++ b/src/graph/nodes/QuantizationLayerNode.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2020,2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,6 +23,7 @@
  */
 #include "arm_compute/graph/nodes/QuantizationLayerNode.h"
 
+#include "arm_compute/core/utils/DataTypeUtils.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
 
@@ -46,7 +47,7 @@ QuantizationLayerNode::QuantizationLayerNode(QuantizationInfo out_quant_info, Da
 
 bool QuantizationLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
diff --git a/src/graph/nodes/ROIAlignLayerNode.cpp b/src/graph/nodes/ROIAlignLayerNode.cpp
index 62891811f3..5909335826 100644
--- a/src/graph/nodes/ROIAlignLayerNode.cpp
+++ b/src/graph/nodes/ROIAlignLayerNode.cpp
@@ -24,17 +24,15 @@
 
 #include "arm_compute/graph/nodes/ROIAlignLayerNode.h"
 
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
 
-#include "arm_compute/core/Helpers.h"
-
 namespace arm_compute
 {
 namespace graph
 {
-ROIAlignLayerNode::ROIAlignLayerNode(ROIPoolingLayerInfo &pool_info)
-    : _pool_info(pool_info)
+ROIAlignLayerNode::ROIAlignLayerNode(ROIPoolingLayerInfo &pool_info) : _pool_info(pool_info)
 {
     _input_edges.resize(2, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -47,7 +45,7 @@ const ROIPoolingLayerInfo &ROIAlignLayerNode::pooling_info() const
 
 bool ROIAlignLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -92,4 +90,4 @@ void ROIAlignLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/ReductionLayerNode.cpp b/src/graph/nodes/ReductionLayerNode.cpp
index 0e93039894..965c1ba0a5 100644
--- a/src/graph/nodes/ReductionLayerNode.cpp
+++ b/src/graph/nodes/ReductionLayerNode.cpp
@@ -56,7 +56,7 @@ bool ReductionLayerNode::keep_dims() const
 
 bool ReductionLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -74,8 +74,9 @@ TensorDescriptor ReductionLayerNode::configure_output(size_t idx) const
     const Tensor *src = input(0);
     ARM_COMPUTE_ERROR_ON(src == nullptr);
 
-    TensorDescriptor output_info  = src->desc();
-    TensorShape      output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(output_info.shape, _axis, _keep_dims);
+    TensorDescriptor output_info = src->desc();
+    TensorShape      output_shape =
+        arm_compute::misc::shape_calculator::compute_reduced_shape(output_info.shape, _axis, _keep_dims);
     output_info.set_shape(output_shape);
 
     return output_info;
@@ -91,4 +92,4 @@ void ReductionLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/ReorgLayerNode.cpp b/src/graph/nodes/ReorgLayerNode.cpp
index e693e4b931..251a4ea1b2 100644
--- a/src/graph/nodes/ReorgLayerNode.cpp
+++ b/src/graph/nodes/ReorgLayerNode.cpp
@@ -31,8 +31,7 @@ namespace arm_compute
 {
 namespace graph
 {
-ReorgLayerNode::ReorgLayerNode(int stride)
-    : _stride(stride)
+ReorgLayerNode::ReorgLayerNode(int stride) : _stride(stride)
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -51,20 +50,22 @@ TensorDescriptor ReorgLayerNode::compute_output_descriptor(const TensorDescripto
 
     ARM_COMPUTE_ERROR_ON(stride <= 0);
     ARM_COMPUTE_ERROR_ON_MSG((input_width % stride != 0), "The width of the input tensor must be a multiple of stride");
-    ARM_COMPUTE_ERROR_ON_MSG((input_height % stride != 0), "The height of the input tensor must be a multiple of stride");
+    ARM_COMPUTE_ERROR_ON_MSG((input_height % stride != 0),
+                             "The height of the input tensor must be a multiple of stride");
 
     const DataLayout data_layout       = input_descriptor.layout;
     TensorDescriptor output_descriptor = input_descriptor;
     output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::WIDTH), input_width / stride);
     output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::HEIGHT), input_height / stride);
-    output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::CHANNEL), input_channel * stride * stride);
+    output_descriptor.shape.set(get_dimension_idx(data_layout, DataLayoutDimension::CHANNEL),
+                                input_channel * stride * stride);
 
     return output_descriptor;
 }
 
 bool ReorgLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -95,4 +96,4 @@ void ReorgLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/ReshapeLayer.cpp b/src/graph/nodes/ReshapeLayer.cpp
index a6354d03ed..ce6bf9b803 100644
--- a/src/graph/nodes/ReshapeLayer.cpp
+++ b/src/graph/nodes/ReshapeLayer.cpp
@@ -21,17 +21,15 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "arm_compute/graph/nodes/ReshapeLayerNode.h"
-
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/nodes/ReshapeLayerNode.h"
 
 namespace arm_compute
 {
 namespace graph
 {
-ReshapeLayerNode::ReshapeLayerNode(TensorShape shape)
-    : _shape(shape)
+ReshapeLayerNode::ReshapeLayerNode(TensorShape shape) : _shape(shape)
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -39,7 +37,7 @@ ReshapeLayerNode::ReshapeLayerNode(TensorShape shape)
 
 bool ReshapeLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -73,4 +71,4 @@ void ReshapeLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/ResizeLayerNode.cpp b/src/graph/nodes/ResizeLayerNode.cpp
index 2a94bf6063..292b2c643e 100644
--- a/src/graph/nodes/ResizeLayerNode.cpp
+++ b/src/graph/nodes/ResizeLayerNode.cpp
@@ -50,7 +50,7 @@ std::pair<float, float> ResizeLayerNode::scaling_factor() const
 
 bool ResizeLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -88,4 +88,4 @@ void ResizeLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/SliceLayerNode.cpp b/src/graph/nodes/SliceLayerNode.cpp
index b7655b9eae..eb877d9a24 100644
--- a/src/graph/nodes/SliceLayerNode.cpp
+++ b/src/graph/nodes/SliceLayerNode.cpp
@@ -32,8 +32,7 @@ namespace arm_compute
 {
 namespace graph
 {
-SliceLayerNode::SliceLayerNode(const Coordinates &starts, const Coordinates &ends)
-    : _starts(starts), _ends(ends)
+SliceLayerNode::SliceLayerNode(const Coordinates &starts, const Coordinates &ends) : _starts(starts), _ends(ends)
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -50,19 +49,20 @@ Coordinates SliceLayerNode::ends() const
 }
 
 TensorDescriptor SliceLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
-                                                           const Coordinates &starts, const Coordinates &ends)
+                                                           const Coordinates      &starts,
+                                                           const Coordinates      &ends)
 {
     using namespace arm_compute::helpers::tensor_transform;
 
     TensorDescriptor output_desc = input_descriptor;
-    output_desc.shape            = arm_compute::misc::shape_calculator::compute_slice_shape(input_descriptor.shape, starts, ends);
+    output_desc.shape = arm_compute::misc::shape_calculator::compute_slice_shape(input_descriptor.shape, starts, ends);
 
     return output_desc;
 }
 
 bool SliceLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
diff --git a/src/graph/nodes/SoftmaxLayerNode.cpp b/src/graph/nodes/SoftmaxLayerNode.cpp
index 031166993a..4beac81b1f 100644
--- a/src/graph/nodes/SoftmaxLayerNode.cpp
+++ b/src/graph/nodes/SoftmaxLayerNode.cpp
@@ -31,8 +31,7 @@ namespace arm_compute
 {
 namespace graph
 {
-SoftmaxLayerNode::SoftmaxLayerNode(float beta)
-    : _beta(beta)
+SoftmaxLayerNode::SoftmaxLayerNode(float beta) : _beta(beta)
 {
     _input_edges.resize(1, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -45,7 +44,7 @@ float SoftmaxLayerNode::beta() const
 
 bool SoftmaxLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -79,4 +78,4 @@ void SoftmaxLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/SplitLayerNode.cpp b/src/graph/nodes/SplitLayerNode.cpp
index 31931c3a79..dfb6624f80 100644
--- a/src/graph/nodes/SplitLayerNode.cpp
+++ b/src/graph/nodes/SplitLayerNode.cpp
@@ -49,8 +49,8 @@ unsigned int SplitLayerNode::axis() const
     return _axis;
 }
 
-std::pair<TensorDescriptor, Coordinates> SplitLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
-                                                                                   unsigned int num_splits, int axis, unsigned int idx)
+std::pair<TensorDescriptor, Coordinates> SplitLayerNode::compute_output_descriptor(
+    const TensorDescriptor &input_descriptor, unsigned int num_splits, int axis, unsigned int idx)
 {
     // Handle negative axis, negative index is used to specify axis from the end (e.g. -1 for the last axis).
     int              num_dimension = static_cast<int32_t>(input_descriptor.shape.num_dimensions());
@@ -58,7 +58,7 @@ std::pair<TensorDescriptor, Coordinates> SplitLayerNode::compute_output_descript
     Coordinates      coords;
     TensorDescriptor output_descriptor = input_descriptor;
     int              split_size        = input_descriptor.shape[tmp_axis] / num_splits;
-    if(_size_splits.empty())
+    if (_size_splits.empty())
     {
         output_descriptor.shape.set(tmp_axis, split_size);
         coords.set(tmp_axis, idx * split_size);
@@ -66,15 +66,15 @@ std::pair<TensorDescriptor, Coordinates> SplitLayerNode::compute_output_descript
     else
     {
         int split_size = _size_splits[idx];
-        if(split_size == -1)
+        if (split_size == -1)
         {
             split_size = input_descriptor.shape[tmp_axis];
-            for(unsigned int i = 0; i < _size_splits.size() - 1; ++i)
+            for (unsigned int i = 0; i < _size_splits.size() - 1; ++i)
                 split_size -= _size_splits[i];
         }
         output_descriptor.shape.set(tmp_axis, split_size);
         int coord_value = 0;
-        for(unsigned int i = 0; i < idx; ++i)
+        for (unsigned int i = 0; i < idx; ++i)
             coord_value += _size_splits[i];
         coords.set(tmp_axis, coord_value);
     }
@@ -84,12 +84,12 @@ std::pair<TensorDescriptor, Coordinates> SplitLayerNode::compute_output_descript
 
 bool SplitLayerNode::forward_descriptors()
 {
-    if(input_id(0) != NullTensorID)
+    if (input_id(0) != NullTensorID)
     {
         validate();
-        for(unsigned int i = 0; i < _outputs.size(); ++i)
+        for (unsigned int i = 0; i < _outputs.size(); ++i)
         {
-            if(output_id(i) != NullTensorID)
+            if (output_id(i) != NullTensorID)
             {
                 Tensor *dst_i = output(i);
                 ARM_COMPUTE_ERROR_ON(dst_i == nullptr);
@@ -117,10 +117,10 @@ TensorDescriptor SplitLayerNode::configure_output(size_t idx) const
     int tmp_axis      = wrap_around(_axis, num_dimension);
 
     int split_size = (_size_splits.empty()) ? (input_descriptor.shape[tmp_axis] / _num_splits) : _size_splits[idx];
-    if(split_size == -1)
+    if (split_size == -1)
     {
         split_size = input_descriptor.shape[tmp_axis];
-        for(unsigned int i = 0; i < _size_splits.size() - 1; ++i)
+        for (unsigned int i = 0; i < _size_splits.size() - 1; ++i)
             split_size -= _size_splits[i];
     }
     output_descriptor.shape.set(tmp_axis, split_size);
@@ -138,7 +138,7 @@ Status SplitLayerNode::validate() const
     // Handle negative axis, negative index is used to specify axis from the end (e.g. -1 for the last axis).
     int tmp_axis = wrap_around(_axis, num_dimension);
 
-    if(_size_splits.empty())
+    if (_size_splits.empty())
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->desc().shape[tmp_axis] % _num_splits, "Split should be exact");
     }
@@ -156,4 +156,4 @@ void SplitLayerNode::accept(INodeVisitor &v)
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/StackLayerNode.cpp b/src/graph/nodes/StackLayerNode.cpp
index f292b33ad0..031d8fc739 100644
--- a/src/graph/nodes/StackLayerNode.cpp
+++ b/src/graph/nodes/StackLayerNode.cpp
@@ -25,18 +25,16 @@
 
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
 #include "arm_compute/graph/Utils.h"
 
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
 namespace arm_compute
 {
 namespace graph
 {
-StackLayerNode::StackLayerNode(unsigned int total_nodes, int axis)
-    : _total_nodes(total_nodes), _axis(axis)
+StackLayerNode::StackLayerNode(unsigned int total_nodes, int axis) : _total_nodes(total_nodes), _axis(axis)
 {
     _input_edges.resize(_total_nodes, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -64,7 +62,7 @@ TensorDescriptor StackLayerNode::compute_output_descriptor(const std::vector<Ten
 
 bool StackLayerNode::forward_descriptors()
 {
-    if(_outputs[0] != NullTensorID)
+    if (_outputs[0] != NullTensorID)
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
@@ -80,17 +78,15 @@ TensorDescriptor StackLayerNode::configure_output(size_t idx) const
     ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
 
     // Check if all input tensors are set
-    bool are_all_inputs_set = std::all_of(std::begin(_input_edges), std::end(_input_edges), [](const EdgeID & eid)
-    {
-        return eid != EmptyEdgeID;
-    });
+    bool are_all_inputs_set = std::all_of(std::begin(_input_edges), std::end(_input_edges),
+                                          [](const EdgeID &eid) { return eid != EmptyEdgeID; });
 
     TensorDescriptor output_info = {};
 
-    if(are_all_inputs_set)
+    if (are_all_inputs_set)
     {
         std::vector<TensorDescriptor> inputs_descriptors;
-        for(unsigned int i = 0; i < _input_edges.size(); ++i)
+        for (unsigned int i = 0; i < _input_edges.size(); ++i)
         {
             const Tensor *t = _graph->tensor(input_id(i));
             ARM_COMPUTE_ERROR_ON(t == nullptr);
diff --git a/src/graph/nodes/StridedSliceLayerNode.cpp b/src/graph/nodes/StridedSliceLayerNode.cpp
index 6a1a724bb3..fc9f72204c 100644
--- a/src/graph/nodes/StridedSliceLayerNode.cpp
+++ b/src/graph/nodes/StridedSliceLayerNode.cpp
@@ -79,7 +79,7 @@ TensorDescriptor StridedSliceLayerNode::compute_output_descriptor(const TensorDe
 
 bool StridedSliceLayerNode::forward_descriptors()
 {
-    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    if ((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
     {
         Tensor *dst = output(0);
         ARM_COMPUTE_ERROR_ON(dst == nullptr);
diff --git a/src/graph/printers/DotGraphPrinter.cpp b/src/graph/printers/DotGraphPrinter.cpp
index 2e1e9d0951..5587ed23f0 100644
--- a/src/graph/printers/DotGraphPrinter.cpp
+++ b/src/graph/printers/DotGraphPrinter.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,9 +25,9 @@
 
 #include "arm_compute/core/Error.h"
 #include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/nodes/Nodes.h"
 #include "arm_compute/graph/Tensor.h"
 #include "arm_compute/graph/TypePrinter.h"
-#include "arm_compute/graph/nodes/Nodes.h"
 
 namespace arm_compute
 {
@@ -111,8 +111,9 @@ void DotGraphVisitor::visit(PoolingLayerNode &n)
     _info = ss.str();
 }
 
-void DotGraphVisitor::default_visit()
+void DotGraphVisitor::default_visit(INode &n)
 {
+    ARM_COMPUTE_UNUSED(n);
     _info.clear();
 }
 
@@ -151,9 +152,9 @@ void DotGraphPrinter::print_footer(const Graph &g, std::ostream &os)
 
 void DotGraphPrinter::print_nodes(const Graph &g, std::ostream &os)
 {
-    for(const auto &n : g.nodes())
+    for (const auto &n : g.nodes())
     {
-        if(n)
+        if (n)
         {
             // Output node id
             std::string node_id = std::string("n") + support::cpp11::to_string(n->id());
@@ -165,7 +166,8 @@ void DotGraphPrinter::print_nodes(const Graph &g, std::ostream &os)
             std::string name             = n->name().empty() ? node_id : n->name();
             auto        node_description = _dot_node_visitor.info();
 
-            os << R"([label = ")" << name << R"( \n )" << n->assigned_target() << R"( \n )" << node_description << R"("])";
+            os << R"([label = ")" << name << R"( \n )" << n->assigned_target() << R"( \n )" << node_description
+               << R"("])";
             os << ";\n";
         }
     }
@@ -173,16 +175,17 @@ void DotGraphPrinter::print_nodes(const Graph &g, std::ostream &os)
 
 void DotGraphPrinter::print_edges(const Graph &g, std::ostream &os)
 {
-    for(const auto &e : g.edges())
+    for (const auto &e : g.edges())
     {
-        if(e)
+        if (e)
         {
             std::string source_node_id = std::string("n") + support::cpp11::to_string(e->producer_id());
             std::string sink_node_id   = std::string("n") + support::cpp11::to_string(e->consumer_id());
             os << source_node_id << " -> " << sink_node_id << " ";
             const Tensor *t = e->tensor();
             ARM_COMPUTE_ERROR_ON(t == nullptr);
-            os << R"([label = ")" << t->desc().shape << R"( \n )" << t->desc().data_type << R"( \n )" << t->desc().layout << R"("])";
+            os << R"([label = ")" << t->desc().shape << R"( \n )" << t->desc().data_type << R"( \n )"
+               << t->desc().layout << R"("])";
             os << ";\n";
         }
     }