15 files changed, 351 insertions, 974 deletions
diff --git a/src/graph/backends/BackendRegistry.cpp b/src/graph/backends/BackendRegistry.cpp
index 46b4f99e23..bb6af79f8b 100644
--- a/src/graph/backends/BackendRegistry.cpp
+++ b/src/graph/backends/BackendRegistry.cpp
@@ -31,8 +31,7 @@ namespace graph
 {
 namespace backends
 {
-BackendRegistry::BackendRegistry()
-    : _registered_backends()
+BackendRegistry::BackendRegistry() : _registered_backends()
 {
 }
 
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index b2d58e35be..e27a4109d1 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,19 +23,17 @@
  */
 #include "arm_compute/graph/backends/CL/CLDeviceBackend.h"
 
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/INode.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/graph/backends/BackendRegistrar.h"
 #include "arm_compute/graph/backends/CL/CLFunctionFactory.h"
 #include "arm_compute/graph/backends/CL/CLNodeValidator.h"
 #include "arm_compute/graph/backends/CL/CLSubTensorHandle.h"
 #include "arm_compute/graph/backends/CL/CLTensorHandle.h"
-
-#include "arm_compute/core/CL/CLCoreRuntimeContext.h"
-#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphContext.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Tensor.h"
 #include "arm_compute/runtime/BlobLifetimeManager.h"
 #include "arm_compute/runtime/CL/CLBufferAllocator.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
@@ -65,17 +63,18 @@ bool file_exists(const std::string &filename)
 static detail::BackendRegistrar<CLDeviceBackend> CLDeviceBackend_registrar(Target::CL);
 
 CLDeviceBackend::CLDeviceBackend()
-    : _context_count(0), _tuner(), _allocator(nullptr), _tuner_file()
+    : _context_count(0),
+      _tuner(),
+      _gemm_heuristics(),
+      _allocator(nullptr),
+      _tuner_file(),
+      _backend_type(CLBackendType::Native)
 {
 }
 
 CLDeviceBackend::~CLDeviceBackend()
 {
-    // TODO (geopin01) : Shouldn't call non exception safe stuff here
-    if(_tuner.tune_new_kernels() && !_tuner.lws_table().empty() && !_tuner_file.empty())
-    {
-        _tuner.save_to_file(_tuner_file);
-    }
+    _tuner.save_to_file(_tuner_file);
 }
 
 void CLDeviceBackend::set_kernel_tuning(bool enable_tuning)
@@ -91,16 +90,16 @@ void CLDeviceBackend::set_kernel_tuning_mode(CLTunerMode tuning_mode)
 void CLDeviceBackend::initialize_backend()
 {
     // Setup Scheduler
-    CLScheduler::get().default_init(&_tuner);
+    CLScheduler::get().default_init(&_tuner, &_gemm_heuristics, _backend_type);
     // Create allocator with new context
-    _allocator = support::cpp14::make_unique<CLBufferAllocator>(nullptr /* legacy path for CLCoreRuntimeContext */);
+    _allocator = std::make_unique<CLBufferAllocator>();
 }
 
 void CLDeviceBackend::release_backend_context(GraphContext &ctx)
 {
     ARM_COMPUTE_UNUSED(ctx);
     _context_count--;
-    if(_context_count == 0) // No more context using the backend: free resources
+    if (_context_count == 0) // No more context using the backend: free resources
     {
         _allocator = nullptr;
     }
@@ -110,15 +109,17 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
 {
     // Force backend initialization
     _context_count++;
-    if(_context_count == 1)
+    if (_context_count == 1)
     {
+        _backend_type = ctx.config().backend_type;
         initialize_backend();
     }
 
     // Setup tuner
     _tuner_file = ctx.config().tuner_file;
+
     // Load tuner data if available
-    if(file_exists(_tuner_file))
+    if (file_exists(_tuner_file))
     {
         _tuner.load_from_file(_tuner_file);
     }
@@ -126,8 +127,12 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
     set_kernel_tuning(ctx.config().use_tuner);
     set_kernel_tuning_mode(ctx.config().tuner_mode);
 
+    // Attempt to load mlgo heuristics
+    ARM_COMPUTE_ERROR_ON(CLScheduler::get().gemm_heuristics() == nullptr);
+    CLScheduler::get().gemm_heuristics()->reload_from_file(ctx.config().mlgo_file);
+
     // Setup a management backend
-    if(ctx.memory_management_ctx(Target::CL) == nullptr)
+    if (ctx.memory_management_ctx(Target::CL) == nullptr)
     {
         MemoryManagerContext mm_ctx;
         mm_ctx.target      = Target::CL;
@@ -140,7 +145,7 @@ void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
     }
 
     // Create function level weights manager
-    if(ctx.weights_management_ctx(Target::CL) == nullptr)
+    if (ctx.weights_management_ctx(Target::CL) == nullptr)
     {
         WeightsManagerContext wm_ctx;
         wm_ctx.target = Target::CL;
@@ -170,17 +175,18 @@ std::unique_ptr<ITensorHandle> CLDeviceBackend::create_tensor(const Tensor &tens
     TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
     info.set_data_layout(tensor_desc.layout);
 
-    return support::cpp14::make_unique<CLTensorHandle>(info);
+    return std::make_unique<CLTensorHandle>(info);
 }
 
-std::unique_ptr<ITensorHandle> CLDeviceBackend::create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent)
+std::unique_ptr<ITensorHandle>
+CLDeviceBackend::create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent)
 {
-    if(parent == nullptr)
+    if (parent == nullptr)
     {
         return nullptr;
     }
 
-    return support::cpp14::make_unique<CLSubTensorHandle>(parent, shape, coords, extend_parent);
+    return std::make_unique<CLSubTensorHandle>(parent, shape, coords, extend_parent);
 }
 
 std::unique_ptr<arm_compute::IFunction> CLDeviceBackend::configure_node(INode &node, GraphContext &ctx)
@@ -202,7 +208,7 @@ arm_compute::Status CLDeviceBackend::validate_node(INode &node)
 
 std::shared_ptr<arm_compute::IMemoryManager> CLDeviceBackend::create_memory_manager(MemoryManagerAffinity affinity)
 {
-    if(affinity == MemoryManagerAffinity::Offset)
+    if (affinity == MemoryManagerAffinity::Offset)
     {
         ARM_COMPUTE_LOG_GRAPH_WARNING("CL Backend does not support offset affinity memory management!");
         return nullptr;
@@ -220,6 +226,11 @@ std::shared_ptr<arm_compute::IWeightsManager> CLDeviceBackend::create_weights_ma
     auto weights_mgr = std::make_shared<IWeightsManager>();
     return weights_mgr;
 }
+
+void CLDeviceBackend::sync()
+{
+    CLScheduler::get().sync();
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index 98013b9e49..d4e1aa880f 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,12 +22,12 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph/backends/CL/CLFunctionFactory.h"
-
+#include "arm_compute/graph/backends/FunctionHelpers.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/backends/FunctionHelpers.h"
 #include "arm_compute/runtime/CL/CLFunctions.h"
 #include "arm_compute/runtime/CPP/CPPFunctions.h"
+
 #include "src/core/CL/CLKernels.h"
 #include "support/Cast.h"
 
@@ -66,6 +66,7 @@ struct CLEltwiseFunctions
     using Subtraction    = CLArithmeticSubtraction;
     using Multiplication = CLPixelWiseMultiplication;
     using Maximum        = CLElementwiseMax;
+    using Division       = CLArithmeticDivision;
 };
 
 /** Collection of CL unary element-wise functions */
@@ -80,28 +81,27 @@ struct CLFusedLayerTypes
     using ConvolutionLayer          = CLConvolutionLayer;
     using DepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
     using FuseBatchNormalization    = CLFuseBatchNormalization;
+    using GEMMConvolutionLayer      = CLGEMMConvolutionLayer;
 };
 
-// TODO (isagot01): Remove once we support heterogeneous scheduling at function level
 /** Wrapper for the CPP Function in the OpenCL backend **/
 class CPPWrapperFunction : public IFunction
 {
 public:
     /* Default constructor */
-    CPPWrapperFunction()
-        : _tensors(), _func(nullptr)
+    CPPWrapperFunction() : _tensors(), _func(nullptr)
     {
     }
 
     void run() override
     {
-        for(auto &tensor : _tensors)
+        for (auto &tensor : _tensors)
         {
             tensor->map(CLScheduler::get().queue());
         }
         _func->run();
 
-        for(auto &tensor : _tensors)
+        for (auto &tensor : _tensors)
         {
             tensor->unmap(CLScheduler::get().queue());
         }
@@ -126,7 +126,8 @@ namespace detail
 {
 // Specialized functions
 template <>
-std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(DetectionOutputLayerNode &node)
+std::unique_ptr<IFunction>
+create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(DetectionOutputLayerNode &node)
 {
     validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
 
@@ -143,23 +144,19 @@ std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer
     ARM_COMPUTE_ERROR_ON(output == nullptr);
 
     // Create and configure function
-    auto func = support::cpp14::make_unique<CPPDetectionOutputLayer>();
+    auto func = std::make_unique<CPPDetectionOutputLayer>();
     func->configure(input0, input1, input2, output, detect_info);
 
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
-                               << node.name()
-                               << " Type: " << node.type()
-                               << " Target: " << CLTargetInfo::TargetType
-                               << " Data Type: " << input0->info()->data_type()
-                               << " Input0 shape: " << input0->info()->tensor_shape()
-                               << " Input1 shape: " << input1->info()->tensor_shape()
+                               << node.name() << " Type: " << node.type() << " Target: " << CLTargetInfo::TargetType
+                               << " Data Type: " << input0->info()->data_type() << " Input0 shape: "
+                               << input0->info()->tensor_shape() << " Input1 shape: " << input1->info()->tensor_shape()
                                << " Input2 shape: " << input2->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
-                               << " DetectionOutputLayer info: " << detect_info
-                               << std::endl);
+                               << " DetectionOutputLayer info: " << detect_info << std::endl);
 
-    auto wrap_function = support::cpp14::make_unique<CPPWrapperFunction>();
+    auto wrap_function = std::make_unique<CPPWrapperFunction>();
 
     wrap_function->register_function(std::move(func));
     wrap_function->register_tensor(input0);
@@ -167,10 +164,11 @@ std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer
     wrap_function->register_tensor(input2);
     wrap_function->register_tensor(output);
 
-    return RETURN_UNIQUE_PTR(wrap_function);
+    return std::move(wrap_function);
 }
 template <>
-std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(DetectionPostProcessLayerNode &node)
+std::unique_ptr<IFunction>
+create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(DetectionPostProcessLayerNode &node)
 {
     validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 4 /* expected outputs */);
 
@@ -193,26 +191,22 @@ std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostP
     ARM_COMPUTE_ERROR_ON(output3 == nullptr);
 
     // Create and configure function
-    auto func = support::cpp14::make_unique<CPPDetectionPostProcessLayer>();
+    auto func = std::make_unique<CPPDetectionPostProcessLayer>();
     func->configure(input0, input1, input2, output0, output1, output2, output3, detect_info);
 
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
-                               << node.name()
-                               << " Type: " << node.type()
-                               << " Target: " << CLTargetInfo::TargetType
-                               << " Data Type: " << input0->info()->data_type()
-                               << " Input0 shape: " << input0->info()->tensor_shape()
-                               << " Input1 shape: " << input1->info()->tensor_shape()
+                               << node.name() << " Type: " << node.type() << " Target: " << CLTargetInfo::TargetType
+                               << " Data Type: " << input0->info()->data_type() << " Input0 shape: "
+                               << input0->info()->tensor_shape() << " Input1 shape: " << input1->info()->tensor_shape()
                                << " Input2 shape: " << input2->info()->tensor_shape()
                                << " Output0 shape: " << output0->info()->tensor_shape()
                                << " Output1 shape: " << output1->info()->tensor_shape()
                                << " Output2 shape: " << output2->info()->tensor_shape()
                                << " Output3 shape: " << output3->info()->tensor_shape()
-                               << " DetectionPostProcessLayer info: " << detect_info
-                               << std::endl);
+                               << " DetectionPostProcessLayer info: " << detect_info << std::endl);
 
-    auto wrap_function = support::cpp14::make_unique<CPPWrapperFunction>();
+    auto wrap_function = std::make_unique<CPPWrapperFunction>();
 
     wrap_function->register_function(std::move(func));
     wrap_function->register_tensor(input0);
@@ -223,102 +217,134 @@ std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostP
     wrap_function->register_tensor(output2);
     wrap_function->register_tensor(output3);
 
-    return RETURN_UNIQUE_PTR(wrap_function);
+    return std::move(wrap_function);
 }
 } // namespace detail
 
 std::unique_ptr<IFunction> CLFunctionFactory::create(INode *node, GraphContext &ctx)
 {
-    if(node == nullptr)
+    if (node == nullptr)
     {
         return nullptr;
     }
 
     NodeType type = node->type();
-    switch(type)
+    switch (type)
     {
         case NodeType::ActivationLayer:
-            return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
+            return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(
+                *polymorphic_downcast<ActivationLayerNode *>(node));
         case NodeType::ArgMinMaxLayer:
-            return detail::create_arg_min_max_layer<CLArgMinMaxLayer, CLTargetInfo>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
+            return detail::create_arg_min_max_layer<CLArgMinMaxLayer, CLTargetInfo>(
+                *polymorphic_downcast<ArgMinMaxLayerNode *>(node));
         case NodeType::BatchNormalizationLayer:
-            return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+            return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(
+                *polymorphic_downcast<BatchNormalizationLayerNode *>(node));
         case NodeType::BoundingBoxTransformLayer:
-            return detail::create_bounding_box_transform_layer<CLBoundingBoxTransform, CLTargetInfo>(*polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
+            return detail::create_bounding_box_transform_layer<CLBoundingBoxTransform, CLTargetInfo>(
+                *polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
         case NodeType::ChannelShuffleLayer:
-            return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
+            return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(
+                *polymorphic_downcast<ChannelShuffleLayerNode *>(node));
         case NodeType::ConvolutionLayer:
-            return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
+            return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(
+                *polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
         case NodeType::DeconvolutionLayer:
-            return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
+            return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(
+                *polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
         case NodeType::ConcatenateLayer:
-            return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
+            return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(
+                *polymorphic_downcast<ConcatenateLayerNode *>(node));
         case NodeType::DepthToSpaceLayer:
-            return detail::create_depth_to_space_layer<CLDepthToSpaceLayer, CLTargetInfo>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
+            return detail::create_depth_to_space_layer<CLDepthToSpaceLayer, CLTargetInfo>(
+                *polymorphic_downcast<DepthToSpaceLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
-            return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+            return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayer, CLTargetInfo>(
+                *polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::DequantizationLayer:
-            return detail::create_dequantization_layer<CLDequantizationLayer, CLTargetInfo>(*polymorphic_downcast<DequantizationLayerNode *>(node));
+            return detail::create_dequantization_layer<CLDequantizationLayer, CLTargetInfo>(
+                *polymorphic_downcast<DequantizationLayerNode *>(node));
         case NodeType::DetectionOutputLayer:
-            return detail::create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
+            return detail::create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(
+                *polymorphic_downcast<DetectionOutputLayerNode *>(node));
         case NodeType::DetectionPostProcessLayer:
-            return detail::create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+            return detail::create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(
+                *polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
         case NodeType::EltwiseLayer:
-            return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
+            return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(
+                *polymorphic_downcast<EltwiseLayerNode *>(node));
         case NodeType::UnaryEltwiseLayer:
-            return detail::create_unary_eltwise_layer<CLUnaryEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
+            return detail::create_unary_eltwise_layer<CLUnaryEltwiseFunctions, CLTargetInfo>(
+                *polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
         case NodeType::FlattenLayer:
-            return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
+            return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(
+                *polymorphic_downcast<FlattenLayerNode *>(node));
         case NodeType::FullyConnectedLayer:
-            return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
+            return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(
+                *polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
         case NodeType::FusedConvolutionBatchNormalizationLayer:
-            return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
+            return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(
+                *polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
         case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer:
-            return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
+            return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes,
+                                                                                        CLTargetInfo>(
+                *polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
         case NodeType::GenerateProposalsLayer:
-            return detail::create_generate_proposals_layer<CLGenerateProposalsLayer, CLTargetInfo>(*polymorphic_downcast<GenerateProposalsLayerNode *>(node), ctx);
+            return detail::create_generate_proposals_layer<CLGenerateProposalsLayer, CLTargetInfo>(
+                *polymorphic_downcast<GenerateProposalsLayerNode *>(node), ctx);
         case NodeType::L2NormalizeLayer:
-            return detail::create_l2_normalize_layer<CLL2NormalizeLayer, CLTargetInfo>(*polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
+            return detail::create_l2_normalize_layer<CLL2NormalizeLayer, CLTargetInfo>(
+                *polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
         case NodeType::NormalizationLayer:
-            return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
+            return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(
+                *polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
         case NodeType::NormalizePlanarYUVLayer:
-            return detail::create_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer, CLTargetInfo>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
+            return detail::create_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer, CLTargetInfo>(
+                *polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
         case NodeType::PadLayer:
             return detail::create_pad_layer<CLPadLayer, CLTargetInfo>(*polymorphic_downcast<PadLayerNode *>(node));
         case NodeType::PermuteLayer:
-            return detail::create_permute_layer<CLPermute, CLTargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
+            return detail::create_permute_layer<CLPermute, CLTargetInfo>(
+                *polymorphic_downcast<PermuteLayerNode *>(node));
         case NodeType::PoolingLayer:
-            return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
+            return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(
+                *polymorphic_downcast<PoolingLayerNode *>(node));
         case NodeType::PReluLayer:
-            return detail::create_prelu_layer<CLPReluLayer, CLTargetInfo>(*polymorphic_downcast<PReluLayerNode *>(node));
+            return detail::create_prelu_layer<CLPReluLayer, CLTargetInfo>(
+                *polymorphic_downcast<PReluLayerNode *>(node));
         case NodeType::PrintLayer:
             return detail::create_print_layer<CLTargetInfo>(*polymorphic_downcast<PrintLayerNode *>(node));
         case NodeType::PriorBoxLayer:
-            return detail::create_priorbox_layer<CLPriorBoxLayer, CLTargetInfo>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
+            return detail::create_priorbox_layer<CLPriorBoxLayer, CLTargetInfo>(
+                *polymorphic_downcast<PriorBoxLayerNode *>(node));
         case NodeType::QuantizationLayer:
-            return detail::create_quantization_layer<CLQuantizationLayer, CLTargetInfo>(*polymorphic_downcast<QuantizationLayerNode *>(node));
+            return detail::create_quantization_layer<CLQuantizationLayer, CLTargetInfo>(
+                *polymorphic_downcast<QuantizationLayerNode *>(node));
         case NodeType::ReductionOperationLayer:
-            return detail::create_reduction_operation_layer<CLReductionOperation, CLTargetInfo>(*polymorphic_downcast<ReductionLayerNode *>(node), ctx);
+            return detail::create_reduction_operation_layer<CLReductionOperation, CLTargetInfo>(
+                *polymorphic_downcast<ReductionLayerNode *>(node), ctx);
         case NodeType::ReorgLayer:
-            return detail::create_reorg_layer<CLReorgLayer, CLTargetInfo>(*polymorphic_downcast<ReorgLayerNode *>(node));
+            return detail::create_reorg_layer<CLReorgLayer, CLTargetInfo>(
+                *polymorphic_downcast<ReorgLayerNode *>(node));
         case NodeType::ReshapeLayer:
-            return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
+            return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(
+                *polymorphic_downcast<ReshapeLayerNode *>(node));
         case NodeType::ResizeLayer:
             return detail::create_resize_layer<CLScale, CLTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
         case NodeType::ROIAlignLayer:
-            return detail::create_roi_align_layer<CLROIAlignLayer, CLTargetInfo>(*polymorphic_downcast<ROIAlignLayerNode *>(node));
+            return detail::create_roi_align_layer<CLROIAlignLayer, CLTargetInfo>(
+                *polymorphic_downcast<ROIAlignLayerNode *>(node));
         case NodeType::SliceLayer:
             return detail::create_slice_layer<CLSlice, CLTargetInfo>(*polymorphic_downcast<SliceLayerNode *>(node));
         case NodeType::SoftmaxLayer:
-            return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
+            return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(
+                *polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
         case NodeType::StackLayer:
-            return detail::create_stack_layer<CLStackLayer, CLTargetInfo>(*polymorphic_downcast<StackLayerNode *>(node));
+            return detail::create_stack_layer<CLStackLayer, CLTargetInfo>(
+                *polymorphic_downcast<StackLayerNode *>(node));
         case NodeType::StridedSliceLayer:
-            return detail::create_strided_slice_layer<CLStridedSlice, CLTargetInfo>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
-        case NodeType::UpsampleLayer:
-            return detail::create_upsample_layer<CLUpsampleLayer, CLTargetInfo>(*polymorphic_downcast<UpsampleLayerNode *>(node), ctx);
-        case NodeType::YOLOLayer:
-            return detail::create_yolo_layer<CLYOLOLayer, CLTargetInfo>(*polymorphic_downcast<YOLOLayerNode *>(node), ctx);
+            return detail::create_strided_slice_layer<CLStridedSlice, CLTargetInfo>(
+                *polymorphic_downcast<StridedSliceLayerNode *>(node));
         default:
             return nullptr;
     }
diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp
index 830f54ce3f..510eda7935 100644
--- a/src/graph/backends/CL/CLNodeValidator.cpp
+++ b/src/graph/backends/CL/CLNodeValidator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,20 +25,9 @@
 
 #include "arm_compute/graph/backends/ValidateHelpers.h"
 #include "arm_compute/graph/nodes/Nodes.h"
-
 #include "arm_compute/runtime/CL/CLFunctions.h"
 #include "arm_compute/runtime/CPP/CPPFunctions.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "src/core/CL/kernels/CLIm2ColKernel.h"
-#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
-#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
+
 #include "support/Cast.h"
 
 using namespace arm_compute::utils::cast;
@@ -56,6 +45,7 @@ struct CLEltwiseLayerFunctions
     using ArithmeticSubtraction   = CLArithmeticSubtraction;
     using PixelWiseMultiplication = CLPixelWiseMultiplication;
     using ElementwiseMax          = CLElementwiseMax;
+    using ArithmeticDivision      = CLArithmeticDivision;
 };
 
 /** Collection of CL unary element-wise functions */
@@ -66,41 +56,51 @@ struct CLUnaryEltwiseLayerFunctions
 
 Status CLNodeValidator::validate(INode *node)
 {
-    if(node == nullptr)
+    if (node == nullptr)
     {
         return Status{};
     }
 
     NodeType type = node->type();
-    switch(type)
+    switch (type)
     {
         case NodeType::ArgMinMaxLayer:
-            return detail::validate_arg_min_max_layer<CLArgMinMaxLayer>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
+            return detail::validate_arg_min_max_layer<CLArgMinMaxLayer>(
+                *polymorphic_downcast<ArgMinMaxLayerNode *>(node));
         case NodeType::BoundingBoxTransformLayer:
-            return detail::validate_bounding_box_transform_layer<CLBoundingBoxTransform>(*polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
+            return detail::validate_bounding_box_transform_layer<CLBoundingBoxTransform>(
+                *polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
         case NodeType::ChannelShuffleLayer:
-            return detail::validate_channel_shuffle_layer<CLChannelShuffleLayer>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
+            return detail::validate_channel_shuffle_layer<CLChannelShuffleLayer>(
+                *polymorphic_downcast<ChannelShuffleLayerNode *>(node));
         case NodeType::ConvolutionLayer:
-            return detail::validate_convolution_layer<CLConvolutionLayer,
-                   CLDirectConvolutionLayer,
-                   CLGEMMConvolutionLayer,
-                   CLWinogradConvolutionLayer>(*polymorphic_downcast<ConvolutionLayerNode *>(node));
+            return detail::validate_convolution_layer<CLConvolutionLayer, CLDirectConvolutionLayer,
+                                                      CLGEMMConvolutionLayer, CLWinogradConvolutionLayer>(
+                *polymorphic_downcast<ConvolutionLayerNode *>(node));
         case NodeType::DepthToSpaceLayer:
-            return detail::validate_depth_to_space_layer<CLDepthToSpaceLayer>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
+            return detail::validate_depth_to_space_layer<CLDepthToSpaceLayer>(
+                *polymorphic_downcast<DepthToSpaceLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
-            return detail::validate_depthwise_convolution_layer<CLDepthwiseConvolutionLayer>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+            return detail::validate_depthwise_convolution_layer<CLDepthwiseConvolutionLayer>(
+                *polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::DequantizationLayer:
-            return detail::validate_dequantization_layer<CLDequantizationLayer>(*polymorphic_downcast<DequantizationLayerNode *>(node));
+            return detail::validate_dequantization_layer<CLDequantizationLayer>(
+                *polymorphic_downcast<DequantizationLayerNode *>(node));
         case NodeType::DetectionOutputLayer:
-            return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
+            return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(
+                *polymorphic_downcast<DetectionOutputLayerNode *>(node));
         case NodeType::DetectionPostProcessLayer:
-            return detail::validate_detection_post_process_layer<CPPDetectionPostProcessLayer>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+            return detail::validate_detection_post_process_layer<CPPDetectionPostProcessLayer>(
+                *polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
         case NodeType::GenerateProposalsLayer:
-            return detail::validate_generate_proposals_layer<CLGenerateProposalsLayer>(*polymorphic_downcast<GenerateProposalsLayerNode *>(node));
+            return detail::validate_generate_proposals_layer<CLGenerateProposalsLayer>(
+                *polymorphic_downcast<GenerateProposalsLayerNode *>(node));
         case NodeType::L2NormalizeLayer:
-            return detail::validate_l2_normalize_layer<CLL2NormalizeLayer>(*polymorphic_downcast<L2NormalizeLayerNode *>(node));
+            return detail::validate_l2_normalize_layer<CLL2NormalizeLayer>(
+                *polymorphic_downcast<L2NormalizeLayerNode *>(node));
         case NodeType::NormalizePlanarYUVLayer:
-            return detail::validate_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
+            return detail::validate_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer>(
+                *polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
         case NodeType::PadLayer:
             return detail::validate_pad_layer<CLPadLayer>(*polymorphic_downcast<PadLayerNode *>(node));
         case NodeType::PermuteLayer:
@@ -110,9 +110,11 @@ Status CLNodeValidator::validate(INode *node)
         case NodeType::PriorBoxLayer:
             return detail::validate_priorbox_layer<CLPriorBoxLayer>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
         case NodeType::QuantizationLayer:
-            return detail::validate_quantization_layer<CLQuantizationLayer>(*polymorphic_downcast<QuantizationLayerNode *>(node));
+            return detail::validate_quantization_layer<CLQuantizationLayer>(
+                *polymorphic_downcast<QuantizationLayerNode *>(node));
         case NodeType::ReductionOperationLayer:
-            return detail::validate_reduction_operation_layer<CLReductionOperation>(*polymorphic_downcast<ReductionLayerNode *>(node));
+            return detail::validate_reduction_operation_layer<CLReductionOperation>(
+                *polymorphic_downcast<ReductionLayerNode *>(node));
         case NodeType::ReorgLayer:
             return detail::validate_reorg_layer<CLReorgLayer>(*polymorphic_downcast<ReorgLayerNode *>(node));
         case NodeType::ReshapeLayer:
@@ -122,15 +124,14 @@ Status CLNodeValidator::validate(INode *node)
         case NodeType::SliceLayer:
             return detail::validate_slice_layer<CLSlice>(*polymorphic_downcast<SliceLayerNode *>(node));
         case NodeType::StridedSliceLayer:
-            return detail::validate_strided_slice_layer<CLStridedSlice>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
-        case NodeType::UpsampleLayer:
-            return detail::validate_upsample_layer<CLUpsampleLayer>(*polymorphic_downcast<UpsampleLayerNode *>(node));
-        case NodeType::YOLOLayer:
-            return detail::validate_yolo_layer<CLYOLOLayer>(*polymorphic_downcast<YOLOLayerNode *>(node));
+            return detail::validate_strided_slice_layer<CLStridedSlice>(
+                *polymorphic_downcast<StridedSliceLayerNode *>(node));
         case NodeType::EltwiseLayer:
-            return detail::validate_eltwise_Layer<CLEltwiseLayerFunctions>(*polymorphic_downcast<EltwiseLayerNode *>(node));
+            return detail::validate_eltwise_Layer<CLEltwiseLayerFunctions>(
+                *polymorphic_downcast<EltwiseLayerNode *>(node));
         case NodeType::UnaryEltwiseLayer:
-            return detail::validate_unary_eltwise_layer<CLUnaryEltwiseLayerFunctions>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
+            return detail::validate_unary_eltwise_layer<CLUnaryEltwiseLayerFunctions>(
+                *polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
         default:
             return Status{};
     }
diff --git a/src/graph/backends/CL/CLSubTensorHandle.cpp b/src/graph/backends/CL/CLSubTensorHandle.cpp
index b97d25890a..ccdc877a18 100644
--- a/src/graph/backends/CL/CLSubTensorHandle.cpp
+++ b/src/graph/backends/CL/CLSubTensorHandle.cpp
@@ -31,7 +31,10 @@ namespace graph
 {
 namespace backends
 {
-CLSubTensorHandle::CLSubTensorHandle(ITensorHandle *parent_handle, const TensorShape &shape, const Coordinates &coords, bool extend_parent)
+CLSubTensorHandle::CLSubTensorHandle(ITensorHandle     *parent_handle,
+                                     const TensorShape &shape,
+                                     const Coordinates &coords,
+                                     bool               extend_parent)
     : _sub_tensor(), _parent_handle(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(!parent_handle);
@@ -98,4 +101,4 @@ Target CLSubTensorHandle::target() const
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/CL/CLTensorHandle.cpp b/src/graph/backends/CL/CLTensorHandle.cpp
index a496c2ce47..1b69f9dede 100644
--- a/src/graph/backends/CL/CLTensorHandle.cpp
+++ b/src/graph/backends/CL/CLTensorHandle.cpp
@@ -31,8 +31,7 @@ namespace graph
 {
 namespace backends
 {
-CLTensorHandle::CLTensorHandle(const ITensorInfo &info)
-    : _tensor()
+CLTensorHandle::CLTensorHandle(const ITensorInfo &info) : _tensor()
 {
     _tensor.allocator()->init(info);
 }
@@ -49,7 +48,7 @@ void CLTensorHandle::free()
 
 void CLTensorHandle::manage(IMemoryGroup *mg)
 {
-    if(mg != nullptr)
+    if (mg != nullptr)
     {
         mg->manage(&_tensor);
     }
@@ -68,7 +67,7 @@ void CLTensorHandle::unmap()
 void CLTensorHandle::release_if_unused()
 {
     // TODO (geopin01): Release tensor only if all sub-tensors are marked as not used
-    if(!_tensor.is_used())
+    if (!_tensor.is_used())
     {
         _tensor.allocator()->free();
     }
@@ -100,4 +99,4 @@ Target CLTensorHandle::target() const
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp
deleted file mode 100644
index 252093cf2e..0000000000
--- a/src/graph/backends/GLES/GCDeviceBackend.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/graph/backends/GLES/GCDeviceBackend.h"
-
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/INode.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Tensor.h"
-#include "arm_compute/graph/backends/BackendRegistrar.h"
-#include "arm_compute/graph/backends/GLES/GCFunctionFactory.h"
-#include "arm_compute/graph/backends/GLES/GCNodeValidator.h"
-#include "arm_compute/graph/backends/GLES/GCTensorHandle.h"
-
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/runtime/BlobLifetimeManager.h"
-#include "arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h"
-#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/MemoryManagerOnDemand.h"
-#include "arm_compute/runtime/PoolManager.h"
-
-#include "support/ToolchainSupport.h"
-
-namespace arm_compute
-{
-namespace graph
-{
-namespace backends
-{
-/** Register GLES backend */
-static detail::BackendRegistrar<GCDeviceBackend> GCDeviceBackend_registrar(Target::GC);
-
-GCDeviceBackend::GCDeviceBackend()
-    : _initialized(false), _allocator()
-{
-}
-
-void GCDeviceBackend::initialize_backend()
-{
-    // Setup Scheduler
-    GCScheduler::get().default_init();
-}
-
-void GCDeviceBackend::release_backend_context(GraphContext &ctx)
-{
-    //Nothing to do
-    ARM_COMPUTE_UNUSED(ctx);
-}
-
-void GCDeviceBackend::setup_backend_context(GraphContext &ctx)
-{
-    // Force backend initialization
-    if(!_initialized)
-    {
-        initialize_backend();
-        _initialized = true;
-    }
-
-    // Setup a management backend
-    if(ctx.memory_management_ctx(Target::GC) == nullptr)
-    {
-        MemoryManagerContext mm_ctx;
-        mm_ctx.target      = Target::GC;
-        mm_ctx.intra_mm    = create_memory_manager(MemoryManagerAffinity::Buffer);
-        mm_ctx.cross_mm    = create_memory_manager(MemoryManagerAffinity::Buffer);
-        mm_ctx.cross_group = std::make_shared<MemoryGroup>(mm_ctx.cross_mm);
-        mm_ctx.allocator   = &_allocator;
-
-        ctx.insert_memory_management_ctx(std::move(mm_ctx));
-    }
-}
-
-bool GCDeviceBackend::is_backend_supported()
-{
-    return arm_compute::opengles31_is_available();
-}
-
-IAllocator *GCDeviceBackend::backend_allocator()
-{
-    return &_allocator;
-}
-
-std::unique_ptr<ITensorHandle> GCDeviceBackend::create_tensor(const Tensor &tensor)
-{
-    // Get tensor descriptor
-    const TensorDescriptor &tensor_desc = tensor.desc();
-    ARM_COMPUTE_ERROR_ON(tensor_desc.target != Target::GC);
-
-    // Create backend tensor handle
-    TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
-    info.set_data_layout(tensor_desc.layout);
-
-    return support::cpp14::make_unique<GCTensorHandle>(info);
-}
-
-std::unique_ptr<ITensorHandle> GCDeviceBackend::create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent)
-{
-    ARM_COMPUTE_UNUSED(parent, shape, coords, extend_parent);
-    ARM_COMPUTE_ERROR("GLES backend has no sub-tensor support!");
-    return nullptr;
-}
-
-std::unique_ptr<arm_compute::IFunction> GCDeviceBackend::configure_node(INode &node, GraphContext &ctx)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Configuring GC node with ID : " << node.id() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.assigned_target() != Target::GC);
-
-    // Configure node
-    return GCFunctionFactory::create(&node, ctx);
-}
-
-arm_compute::Status GCDeviceBackend::validate_node(INode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating GC node with ID : " << node.id() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.assigned_target() != Target::GC);
-
-    return GCNodeValidator::validate(&node);
-}
-
-std::shared_ptr<arm_compute::IMemoryManager> GCDeviceBackend::create_memory_manager(MemoryManagerAffinity affinity)
-{
-    if(affinity == MemoryManagerAffinity::Offset)
-    {
-        ARM_COMPUTE_LOG_GRAPH_WARNING("GC Backend does not support offset affinity memory management!");
-        return nullptr;
-    }
-
-    auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
-    auto pool_mgr     = std::make_shared<PoolManager>();
-    auto mm           = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
-
-    return mm;
-}
-
-std::shared_ptr<arm_compute::IWeightsManager> GCDeviceBackend::create_weights_manager()
-{
-    return nullptr;
-}
-} // namespace backends
-} // namespace graph
-} // namespace arm_compute
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
deleted file mode 100644
index 7d9d388ebe..0000000000
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/graph/backends/GLES/GCFunctionFactory.h"
-
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/backends/FunctionHelpers.h"
-#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
-#include "support/Cast.h"
-
-using namespace arm_compute::utils::cast;
-
-namespace arm_compute
-{
-namespace graph
-{
-namespace backends
-{
-/** Target specific information structure used to pass information to the layer templates */
-struct GCTargetInfo
-{
-    using TensorType    = arm_compute::IGCTensor;
-    using SrcTensorType = TensorType;
-    static Target TargetType;
-};
-
-Target GCTargetInfo::TargetType = Target::GC;
-
-/** Collection of GC convolution functions */
-struct GCConvolutionLayerFunctions
-{
-    using GenericConvolutionLayer = GCConvolutionLayer;
-    using GEMMConvolutionLayer    = GCConvolutionLayer;
-    using DirectConvolutionLayer  = GCDirectConvolutionLayer;
-};
-
-/** Collection of GC depthwise convolution functions */
-struct GCDepthwiseConvolutionLayerFunctions
-{
-    using DepthwiseConvolutionLayer3x3 = GCDepthwiseConvolutionLayer3x3;
-};
-
-/** Collection of GC element-wise functions */
-struct GCEltwiseFunctions
-{
-    using Addition       = GCArithmeticAddition;
-    using Multiplication = GCPixelWiseMultiplication;
-};
-
-namespace detail
-{
-template <>
-std::unique_ptr<IFunction> create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(ConvolutionLayerNode &node, GraphContext &ctx)
-{
-    validate_node<GCTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
-
-    // Extract IO and info
-    GCTargetInfo::TensorType *input   = get_backing_tensor<GCTargetInfo>(node.input(0));
-    GCTargetInfo::TensorType *weights = get_backing_tensor<GCTargetInfo>(node.input(1));
-    GCTargetInfo::TensorType *biases  = get_backing_tensor<GCTargetInfo>(node.input(2));
-    GCTargetInfo::TensorType *output  = get_backing_tensor<GCTargetInfo>(node.output(0));
-
-    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
-    {
-        biases->info()->set_data_type(DataType::S32);
-    }
-
-    const PadStrideInfo       conv_info      = node.convolution_info();
-    const ConvolutionMethod   conv_algorithm = node.convolution_method();
-    const ActivationLayerInfo fused_act      = node.fused_activation();
-
-    // Create and configure function (we assume that functions have been validated before creation)
-    std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, GCTargetInfo::TargetType);
-    std::unique_ptr<IFunction>      func;
-    std::string                     func_name;
-
-    if(conv_algorithm == ConvolutionMethod::Direct)
-    {
-        std::tie(func, func_name) = create_named_function<GCConvolutionLayerFunctions::DirectConvolutionLayer>(
-                                        std::string("DirectConvolutionLayer"),
-                                        input, weights, biases, output, conv_info, fused_act);
-    }
-    else
-    {
-        std::tie(func, func_name) = create_named_memory_managed_function<GCConvolutionLayerFunctions::GenericConvolutionLayer>(
-                                        std::string("ConvolutionLayer"), mm,
-                                        input, weights, biases, output, conv_info, WeightsInfo(), Size2D(1U, 1U), fused_act);
-    }
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
-                               << node.name()
-                               << " Type: " << func_name
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input QuantInfo: " << input->info()->quantization_info()
-                               << " Weights QuantInfo: " << weights->info()->quantization_info()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Weights shape: " << weights->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "")
-                               << std::endl);
-    return func;
-}
-
-template <>
-std::unique_ptr<IFunction> create_depthwise_convolution_layer<GCDepthwiseConvolutionLayerFunctions, GCTargetInfo>(DepthwiseConvolutionLayerNode &node)
-{
-    validate_node<GCTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
-
-    // Extract IO and info
-    GCTargetInfo::TensorType *input   = get_backing_tensor<GCTargetInfo>(node.input(0));
-    GCTargetInfo::TensorType *weights = get_backing_tensor<GCTargetInfo>(node.input(1));
-    GCTargetInfo::TensorType *biases  = get_backing_tensor<GCTargetInfo>(node.input(2));
-    GCTargetInfo::TensorType *output  = get_backing_tensor<GCTargetInfo>(node.output(0));
-
-    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
-    {
-        biases->info()->set_data_type(DataType::S32);
-    }
-
-    const PadStrideInfo              conv_info        = node.convolution_info();
-    const DepthwiseConvolutionMethod dwc_algorithm    = node.depthwise_convolution_method();
-    const ActivationLayerInfo        fused_act        = node.fused_activation();
-    const int                        depth_multiplier = node.depth_multiplier();
-
-    // Create and configure function (we assume that functions have been validated before creation)
-    std::unique_ptr<IFunction> func;
-    std::string                func_name;
-    if(dwc_algorithm == DepthwiseConvolutionMethod::Optimized3x3)
-    {
-        std::tie(func, func_name) = create_named_function<GCDepthwiseConvolutionLayerFunctions::DepthwiseConvolutionLayer3x3>(
-                                        std::string("DepthwiseConvolutionLayer3x3"),
-                                        input, weights, biases, output, conv_info, depth_multiplier, fused_act);
-    }
-    else
-    {
-        ARM_COMPUTE_ERROR("Generic DepthwiseConvolutionLayer is not supported in GLES backend");
-    }
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
-                               << node.name()
-                               << " Type: " << func_name
-                               << " Target " << GCTargetInfo::TargetType
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input QuantInfo: " << input->info()->quantization_info()
-                               << " Weights QuantInfo: " << weights->info()->quantization_info()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Weights shape: " << weights->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << " Depth multiplier: " << depth_multiplier
-                               << (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "")
-                               << std::endl);
-    return func;
-}
-
-template <>
-std::unique_ptr<IFunction> create_eltwise_layer<GCEltwiseFunctions, GCTargetInfo>(EltwiseLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating GC EltwiseLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 2);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    GCTargetInfo::TensorType *input1         = get_backing_tensor<GCTargetInfo>(node.input(0));
-    GCTargetInfo::TensorType *input2         = get_backing_tensor<GCTargetInfo>(node.input(1));
-    GCTargetInfo::TensorType *output         = get_backing_tensor<GCTargetInfo>(node.output(0));
-    const EltwiseOperation    eltwise_op     = node.eltwise_operation();
-    const ConvertPolicy       convert_policy = node.convert_policy();
-    ARM_COMPUTE_ERROR_ON(input1 == nullptr);
-    ARM_COMPUTE_ERROR_ON(input2 == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    std::unique_ptr<IFunction> func = nullptr;
-    std::string                func_name;
-    if(eltwise_op == EltwiseOperation::Add)
-    {
-        std::tie(func, func_name) = create_named_function<GCEltwiseFunctions::Addition>(
-                                        std::string("GCArithmeticAddition"),
-                                        input1, input2, output, convert_policy);
-    }
-    else if(eltwise_op == EltwiseOperation::Sub)
-    {
-        ARM_COMPUTE_ERROR("Arithmetic subtraction is not supported in GLES backend");
-    }
-    else if(eltwise_op == EltwiseOperation::Mul)
-    {
-        std::tie(func, func_name) = create_named_function<GCEltwiseFunctions::Multiplication>(
-                                        std::string("PixelWiseMultiplication"),
-                                        input1, input2, output, 1.f);
-    }
-    else
-    {
-        ARM_COMPUTE_ERROR("Unsupported element-wise operation!");
-    }
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
-                               << node.name()
-                               << " Type: " << node.type()
-                               << " Target: " << GCTargetInfo::TargetType
-                               << " Operation: " << func_name
-                               << " Data Type: " << input1->info()->data_type()
-                               << " Shape: " << input1->info()->tensor_shape()
-                               << std::endl);
-
-    return func;
-}
-} //namespace detail
-
-std::unique_ptr<IFunction> GCFunctionFactory::create(INode *node, GraphContext &ctx)
-{
-    if(node == nullptr)
-    {
-        return nullptr;
-    }
-
-    NodeType type = node->type();
-    switch(type)
-    {
-        case NodeType::ActivationLayer:
-            return detail::create_activation_layer<GCActivationLayer, GCTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
-        case NodeType::BatchNormalizationLayer:
-            return detail::create_batch_normalization_layer<GCBatchNormalizationLayer, GCTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
-        case NodeType::ConvolutionLayer:
-            return detail::create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
-        case NodeType::ConcatenateLayer:
-            return detail::create_concatenate_layer<GCConcatenateLayer, GCTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
-        case NodeType::DepthwiseConvolutionLayer:
-            return detail::create_depthwise_convolution_layer<GCDepthwiseConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
-        case NodeType::EltwiseLayer:
-            return detail::create_eltwise_layer<GCEltwiseFunctions, GCTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
-        case NodeType::FullyConnectedLayer:
-            return detail::create_fully_connected_layer<GCFullyConnectedLayer, GCTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
-        case NodeType::NormalizationLayer:
-            return detail::create_normalization_layer<GCNormalizationLayer, GCTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
-        case NodeType::NormalizePlanarYUVLayer:
-            return detail::create_normalize_planar_yuv_layer<GCNormalizePlanarYUVLayer, GCTargetInfo>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
-        case NodeType::PoolingLayer:
-            return detail::create_pooling_layer<GCPoolingLayer, GCTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
-        case NodeType::PrintLayer:
-            return detail::create_print_layer<GCTargetInfo>(*polymorphic_downcast<PrintLayerNode *>(node));
-        case NodeType::ResizeLayer:
-            return detail::create_resize_layer<GCScale, GCTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
-        case NodeType::SoftmaxLayer:
-            return detail::create_softmax_layer<GCSoftmaxLayer, GCTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
-        default:
-            return nullptr;
-    }
-}
-} // namespace backends
-} // namespace graph
-} // namespace arm_compute
diff --git a/src/graph/backends/GLES/GCNodeValidator.cpp b/src/graph/backends/GLES/GCNodeValidator.cpp
deleted file mode 100644
index 13a93a2556..0000000000
--- a/src/graph/backends/GLES/GCNodeValidator.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/graph/backends/GLES/GCNodeValidator.h"
-
-#include "arm_compute/graph/backends/ValidateHelpers.h"
-#include "arm_compute/graph/nodes/Nodes.h"
-
-#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
-#include "support/Cast.h"
-
-using namespace arm_compute::utils::cast;
-
-namespace arm_compute
-{
-namespace graph
-{
-namespace backends
-{
-namespace
-{
-/** Validates a Depthwise Convolution layer node
- *
- * @param[in] node Node to validate
- *
- * @return Status
- */
-Status validate_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating GCDepthwiseConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_RETURN_ERROR_ON(node.num_inputs() != 3);
-    ARM_COMPUTE_RETURN_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    arm_compute::ITensorInfo *weights = detail::get_backing_tensor_info(node.input(1));
-    ARM_COMPUTE_ERROR_ON(weights == nullptr);
-
-    // TODO (geopin01) : Switch when validation is implemented
-    // Validate function
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->tensor_shape().x() != 3 && weights->tensor_shape().y() != 3, "Unsupported depthwise convolution");
-
-    return Status{};
-}
-/** Validates a Convolution layer node
- *
- * @param[in] node Node to validate
- *
- * @return Status
- */
-Status validate_convolution_layer(ConvolutionLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating ConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_RETURN_ERROR_ON(node.num_inputs() != 3);
-    ARM_COMPUTE_RETURN_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    arm_compute::ITensorInfo *weights        = detail::get_backing_tensor_info(node.input(1));
-    const PadStrideInfo       conv_info      = node.convolution_info();
-    const ConvolutionMethod   conv_algorithm = node.convolution_method();
-
-    // Validate function
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(node.num_groups() != 1, "Grouping is not supported by ConvolutionLayer!");
-    if(conv_algorithm == ConvolutionMethod::Direct)
-    {
-        bool is_square         = weights->tensor_shape().x() == weights->tensor_shape().y();
-        bool is_direct         = (weights->tensor_shape().x() == 1) || (weights->tensor_shape().x() == 3) || (weights->tensor_shape().x() == 5);
-        bool is_correct_stride = (conv_info.stride().first) <= 2 && (conv_info.stride().second <= 2);
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(is_square && is_direct && is_correct_stride), "Direct convolution is not supported for given configuration");
-    }
-
-    return Status{};
-}
-} // namespace
-
-Status GCNodeValidator::validate(INode *node)
-{
-    if(node == nullptr)
-    {
-        return Status{};
-    }
-
-    NodeType type = node->type();
-    switch(type)
-    {
-        case NodeType::BoundingBoxTransformLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : BoundingBoxTransformLayer");
-        case NodeType::ChannelShuffleLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ChannelShuffleLayer");
-        case NodeType::ConvolutionLayer:
-            return validate_convolution_layer(*polymorphic_downcast<ConvolutionLayerNode *>(node));
-        case NodeType::DepthwiseConvolutionLayer:
-            return validate_depthwise_convolution_layer(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
-        case NodeType::DequantizationLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : DequantizationLayer");
-        case NodeType::DetectionOutputLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : DetectionOutputLayer");
-        case NodeType::DetectionPostProcessLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : DetectionPostProcessLayer");
-        case NodeType::FlattenLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : FlattenLayer");
-        case NodeType::GenerateProposalsLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : GenerateProposalsLayer");
-        case NodeType::NormalizePlanarYUVLayer:
-            return detail::validate_normalize_planar_yuv_layer<GCNormalizePlanarYUVLayer>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
-        case NodeType::PadLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : PadLayer");
-        case NodeType::PermuteLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : PermuteLayer");
-        case NodeType::PriorBoxLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : PriorBoxLayer");
-        case NodeType::QuantizationLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : QuantizationLayer");
-        case NodeType::ReorgLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ReorgLayer");
-        case NodeType::ReshapeLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ReshapeLayer");
-        case NodeType::ROIAlignLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ROIAlignLayer");
-        case NodeType::SliceLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : SliceLayer");
-        case NodeType::UpsampleLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : UpsampleLayer");
-        case NodeType::YOLOLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : YOLOLayer");
-        default:
-            return Status{};
-    }
-}
-} // namespace backends
-} // namespace graph
-} // namespace arm_compute
diff --git a/src/graph/backends/GLES/GCTensorHandle.cpp b/src/graph/backends/GLES/GCTensorHandle.cpp
deleted file mode 100644
index 94e8813246..0000000000
--- a/src/graph/backends/GLES/GCTensorHandle.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/graph/backends/GLES/GCTensorHandle.h"
-
-#include "arm_compute/runtime/IMemoryGroup.h"
-
-namespace arm_compute
-{
-namespace graph
-{
-namespace backends
-{
-GCTensorHandle::GCTensorHandle(const ITensorInfo &info)
-    : _tensor()
-{
-    _tensor.allocator()->init(info);
-}
-
-void GCTensorHandle::allocate()
-{
-    _tensor.allocator()->allocate();
-}
-
-void GCTensorHandle::free()
-{
-    _tensor.allocator()->free();
-}
-
-void GCTensorHandle::manage(IMemoryGroup *mg)
-{
-    if(mg != nullptr)
-    {
-        mg->manage(&_tensor);
-    }
-}
-
-void GCTensorHandle::map(bool blocking)
-{
-    _tensor.map(blocking);
-}
-
-void GCTensorHandle::unmap()
-{
-    _tensor.unmap();
-}
-
-void GCTensorHandle::release_if_unused()
-{
-    // TODO (geopin01): Release tensor only if all sub-tensors are marked as not used
-    if(!_tensor.is_used())
-    {
-        _tensor.allocator()->free();
-    }
-}
-
-const arm_compute::ITensor &GCTensorHandle::tensor() const
-{
-    return _tensor;
-}
-
-arm_compute::ITensor &GCTensorHandle::tensor()
-{
-    return _tensor;
-}
-
-ITensorHandle *GCTensorHandle::parent_handle()
-{
-    return this;
-}
-
-bool GCTensorHandle::is_subtensor() const
-{
-    return false;
-}
-
-Target GCTensorHandle::target() const
-{
-    return Target::GC;
-}
-} // namespace backends
-} // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index adb87a952b..fc7b309803 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021,2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,18 +23,17 @@
  */
 #include "arm_compute/graph/backends/NEON/NEDeviceBackend.h"
 
-#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/INode.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/graph/backends/BackendRegistrar.h"
 #include "arm_compute/graph/backends/NEON/NEFunctionFactory.h"
 #include "arm_compute/graph/backends/NEON/NENodeValidator.h"
 #include "arm_compute/graph/backends/NEON/NESubTensorHandle.h"
 #include "arm_compute/graph/backends/NEON/NETensorHandle.h"
-
-#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphContext.h"
+#include "arm_compute/graph/INode.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Tensor.h"
 #include "arm_compute/runtime/Allocator.h"
 #include "arm_compute/runtime/BlobLifetimeManager.h"
 #include "arm_compute/runtime/IWeightsManager.h"
@@ -44,19 +43,16 @@
 #include "arm_compute/runtime/PoolManager.h"
 #include "arm_compute/runtime/Scheduler.h"
 
-#include "support/ToolchainSupport.h"
-
 namespace arm_compute
 {
 namespace graph
 {
 namespace backends
 {
-/** Register NEON backend */
+/** Register CPU backend */
 static detail::BackendRegistrar<NEDeviceBackend> NEDeviceBackend_registrar(Target::NEON);
 
-NEDeviceBackend::NEDeviceBackend()
-    : _allocator()
+NEDeviceBackend::NEDeviceBackend() : _allocator()
 {
 }
 
@@ -74,13 +70,13 @@ void NEDeviceBackend::release_backend_context(GraphContext &ctx)
 void NEDeviceBackend::setup_backend_context(GraphContext &ctx)
 {
     // Set number of threads
-    if(ctx.config().num_threads >= 0)
+    if (ctx.config().num_threads >= 0)
     {
         Scheduler::get().set_num_threads(ctx.config().num_threads);
     }
 
     // Create function level memory manager
-    if(ctx.memory_management_ctx(Target::NEON) == nullptr)
+    if (ctx.memory_management_ctx(Target::NEON) == nullptr)
     {
         MemoryManagerContext mm_ctx;
         mm_ctx.target      = Target::NEON;
@@ -93,7 +89,7 @@ void NEDeviceBackend::setup_backend_context(GraphContext &ctx)
     }
 
     // Create function level weights manager
-    if(ctx.weights_management_ctx(Target::NEON) == nullptr)
+    if (ctx.weights_management_ctx(Target::NEON) == nullptr)
     {
         WeightsManagerContext wm_ctx;
         wm_ctx.target = Target::NEON;
@@ -123,22 +119,23 @@ std::unique_ptr<ITensorHandle> NEDeviceBackend::create_tensor(const Tensor &tens
     TensorInfo info(tensor_desc.shape, 1, tensor_desc.data_type, tensor_desc.quant_info);
     info.set_data_layout(tensor_desc.layout);
 
-    return support::cpp14::make_unique<NETensorHandle>(info);
+    return std::make_unique<NETensorHandle>(info);
 }
 
-std::unique_ptr<ITensorHandle> NEDeviceBackend::create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent)
+std::unique_ptr<ITensorHandle>
+NEDeviceBackend::create_subtensor(ITensorHandle *parent, TensorShape shape, Coordinates coords, bool extend_parent)
 {
-    if(parent == nullptr)
+    if (parent == nullptr)
     {
         return nullptr;
     }
 
-    return support::cpp14::make_unique<NESubTensorHandle>(parent, shape, coords, extend_parent);
+    return std::make_unique<NESubTensorHandle>(parent, shape, coords, extend_parent);
 }
 
 std::unique_ptr<arm_compute::IFunction> NEDeviceBackend::configure_node(INode &node, GraphContext &ctx)
 {
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Configuring NEON node with ID : " << node.id() << std::endl);
+    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Configuring CPU node with ID : " << node.id() << std::endl);
     ARM_COMPUTE_ERROR_ON(node.assigned_target() != Target::NEON);
 
     // Configure node
@@ -147,7 +144,7 @@ std::unique_ptr<arm_compute::IFunction> NEDeviceBackend::configure_node(INode &n
 
 arm_compute::Status NEDeviceBackend::validate_node(INode &node)
 {
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating NEON node with ID : " << node.id() << std::endl);
+    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating CPU node with ID : " << node.id() << std::endl);
     ARM_COMPUTE_ERROR_ON(node.assigned_target() != Target::NEON);
 
     return NENodeValidator::validate(&node);
@@ -156,7 +153,7 @@ arm_compute::Status NEDeviceBackend::validate_node(INode &node)
 std::shared_ptr<arm_compute::IMemoryManager> NEDeviceBackend::create_memory_manager(MemoryManagerAffinity affinity)
 {
     std::shared_ptr<ILifetimeManager> lifetime_mgr = nullptr;
-    if(affinity == MemoryManagerAffinity::Buffer)
+    if (affinity == MemoryManagerAffinity::Buffer)
     {
         lifetime_mgr = std::make_shared<BlobLifetimeManager>();
     }
@@ -175,6 +172,11 @@ std::shared_ptr<arm_compute::IWeightsManager> NEDeviceBackend::create_weights_ma
     auto weights_mgr = std::make_shared<IWeightsManager>();
     return weights_mgr;
 }
+
+void NEDeviceBackend::sync()
+{
+    // nop
+}
 } // namespace backends
 } // namespace graph
 } // namespace arm_compute
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index ec06f3fa30..fe15d4cec1 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021,2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,18 +23,15 @@
  */
 #include "arm_compute/graph/backends/NEON/NEFunctionFactory.h"
 
+#include "arm_compute/graph/backends/FunctionHelpers.h"
+#include "arm_compute/graph/backends/Utils.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/TypePrinter.h"
-#include "arm_compute/graph/backends/FunctionHelpers.h"
-#include "arm_compute/graph/backends/Utils.h"
 #include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/TypePrinter.h"
 #include "arm_compute/runtime/CPP/CPPFunctions.h"
 #include "arm_compute/runtime/NEON/NEFunctions.h"
-#include "src/core/NEON/NEKernels.h"
-#include "support/Cast.h"
-#include "support/ToolchainSupport.h"
 
 using namespace arm_compute::utils::cast;
 
@@ -55,7 +52,7 @@ struct NETargetInfo
 
 Target NETargetInfo::TargetType = Target::NEON;
 
-/** Collection of NEON convolution functions */
+/** Collection of CPU convolution functions */
 struct NEConvolutionLayerFunctions
 {
     using GenericConvolutionLayer  = NEConvolutionLayer;
@@ -64,22 +61,23 @@ struct NEConvolutionLayerFunctions
     using WinogradConvolutionLayer = NEWinogradConvolutionLayer;
 };
 
-/** Collection of NEON element-wise functions */
+/** Collection of CPU element-wise functions */
 struct NEEltwiseFunctions
 {
     using Addition       = NEArithmeticAddition;
     using Subtraction    = NEArithmeticSubtraction;
     using Multiplication = NEPixelWiseMultiplication;
     using Maximum        = NEElementwiseMax;
+    using Division       = NEElementwiseDivision;
 };
 
-/** Collection of NEON unary element-wise functions */
+/** Collection of CPU unary element-wise functions */
 struct NEUnaryEltwiseFunctions
 {
     using Exp = NEExpLayer;
 };
 
-/** Function and tensor types to be used inside a NEON fused convolution/batch normalization layer */
+/** Function and tensor types to be used inside a fused convolution/batch normalization layer */
 struct NEFusedLayerTypes
 {
     using ConvolutionLayer          = NEConvolutionLayer;
@@ -90,7 +88,8 @@ struct NEFusedLayerTypes
 namespace detail
 {
 template <>
-std::unique_ptr<IFunction> create_normalization_layer<NENormalizationLayer, NETargetInfo>(NormalizationLayerNode &node, GraphContext &ctx)
+std::unique_ptr<IFunction> create_normalization_layer<NENormalizationLayer, NETargetInfo>(NormalizationLayerNode &node,
+                                                                                          GraphContext           &ctx)
 {
     validate_node<NETargetInfo>(node, 1 /* expected inputs */, 1 /* expected outputs */);
 
@@ -102,106 +101,132 @@ std::unique_ptr<IFunction> create_normalization_layer<NENormalizationLayer, NETa
     ARM_COMPUTE_ERROR_ON(output == nullptr);
 
     // Create and configure function
-    auto func = support::cpp14::make_unique<NENormalizationLayer>(get_memory_manager(ctx, NETargetInfo::TargetType));
+    auto func = std::make_unique<NENormalizationLayer>(get_memory_manager(ctx, NETargetInfo::TargetType));
     func->configure(input, output, norm_info);
 
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
-                               << node.name()
-                               << " Type: " << node.type()
-                               << " Target: " << NETargetInfo::TargetType
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << " Normalization info: " << norm_info.type()
-                               << std::endl);
+                               << node.name() << " Type: " << node.type() << " Target: " << NETargetInfo::TargetType
+                               << " Data Type: " << input->info()->data_type() << " Input shape: "
+                               << input->info()->tensor_shape() << " Output shape: " << output->info()->tensor_shape()
+                               << " Normalization info: " << norm_info.type() << std::endl);
 
-    return RETURN_UNIQUE_PTR(func);
+    return func;
 }
 } // namespace detail
 
 std::unique_ptr<IFunction> NEFunctionFactory::create(INode *node, GraphContext &ctx)
 {
-    if(node == nullptr)
+    if (node == nullptr)
     {
         return nullptr;
     }
 
     NodeType type = node->type();
-    switch(type)
+    switch (type)
     {
         case NodeType::ActivationLayer:
-            return detail::create_activation_layer<NEActivationLayer, NETargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
+            return detail::create_activation_layer<NEActivationLayer, NETargetInfo>(
+                *polymorphic_downcast<ActivationLayerNode *>(node));
         case NodeType::ArgMinMaxLayer:
-            return detail::create_arg_min_max_layer<NEArgMinMaxLayer, NETargetInfo>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
+            return detail::create_arg_min_max_layer<NEArgMinMaxLayer, NETargetInfo>(
+                *polymorphic_downcast<ArgMinMaxLayerNode *>(node));
         case NodeType::BatchNormalizationLayer:
-            return detail::create_batch_normalization_layer<NEBatchNormalizationLayer, NETargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+            return detail::create_batch_normalization_layer<NEBatchNormalizationLayer, NETargetInfo>(
+                *polymorphic_downcast<BatchNormalizationLayerNode *>(node));
         case NodeType::ChannelShuffleLayer:
-            return detail::create_channel_shuffle_layer<NEChannelShuffleLayer, NETargetInfo>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
+            return detail::create_channel_shuffle_layer<NEChannelShuffleLayer, NETargetInfo>(
+                *polymorphic_downcast<ChannelShuffleLayerNode *>(node));
         case NodeType::ConvolutionLayer:
-            return detail::create_convolution_layer<NEConvolutionLayerFunctions, NETargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
+            return detail::create_convolution_layer<NEConvolutionLayerFunctions, NETargetInfo>(
+                *polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
         case NodeType::DepthToSpaceLayer:
-            return detail::create_depth_to_space_layer<NEDepthToSpaceLayer, NETargetInfo>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
+            return detail::create_depth_to_space_layer<NEDepthToSpaceLayer, NETargetInfo>(
+                *polymorphic_downcast<DepthToSpaceLayerNode *>(node));
         case NodeType::DeconvolutionLayer:
-            return detail::create_deconvolution_layer<NEDeconvolutionLayer, NETargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
+            return detail::create_deconvolution_layer<NEDeconvolutionLayer, NETargetInfo>(
+                *polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
         case NodeType::ConcatenateLayer:
-            return detail::create_concatenate_layer<NEConcatenateLayer, NETargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
+            return detail::create_concatenate_layer<NEConcatenateLayer, NETargetInfo>(
+                *polymorphic_downcast<ConcatenateLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
-            return detail::create_depthwise_convolution_layer<NEDepthwiseConvolutionLayer, NETargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+            return detail::create_depthwise_convolution_layer<NEDepthwiseConvolutionLayer, NETargetInfo>(
+                *polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::DequantizationLayer:
-            return detail::create_dequantization_layer<NEDequantizationLayer, NETargetInfo>(*polymorphic_downcast<DequantizationLayerNode *>(node));
+            return detail::create_dequantization_layer<NEDequantizationLayer, NETargetInfo>(
+                *polymorphic_downcast<DequantizationLayerNode *>(node));
         case NodeType::DetectionOutputLayer:
-            return detail::create_detection_output_layer<CPPDetectionOutputLayer, NETargetInfo>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
+            return detail::create_detection_output_layer<CPPDetectionOutputLayer, NETargetInfo>(
+                *polymorphic_downcast<DetectionOutputLayerNode *>(node));
         case NodeType::DetectionPostProcessLayer:
-            return detail::create_detection_post_process_layer<NEDetectionPostProcessLayer, NETargetInfo>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+            return detail::create_detection_post_process_layer<NEDetectionPostProcessLayer, NETargetInfo>(
+                *polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
         case NodeType::EltwiseLayer:
-            return detail::create_eltwise_layer<NEEltwiseFunctions, NETargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
+            return detail::create_eltwise_layer<NEEltwiseFunctions, NETargetInfo>(
+                *polymorphic_downcast<EltwiseLayerNode *>(node));
         case NodeType::UnaryEltwiseLayer:
-            return detail::create_unary_eltwise_layer<NEUnaryEltwiseFunctions, NETargetInfo>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
+            return detail::create_unary_eltwise_layer<NEUnaryEltwiseFunctions, NETargetInfo>(
+                *polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
         case NodeType::FlattenLayer:
-            return detail::create_flatten_layer<NEFlattenLayer, NETargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
+            return detail::create_flatten_layer<NEFlattenLayer, NETargetInfo>(
+                *polymorphic_downcast<FlattenLayerNode *>(node));
         case NodeType::FullyConnectedLayer:
-            return detail::create_fully_connected_layer<NEFullyConnectedLayer, NETargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
+            return detail::create_fully_connected_layer<NEFullyConnectedLayer, NETargetInfo>(
+                *polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
         case NodeType::FusedConvolutionBatchNormalizationLayer:
-            return detail::create_fused_convolution_batch_normalization_layer<NEFusedLayerTypes, NETargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
+            return detail::create_fused_convolution_batch_normalization_layer<NEFusedLayerTypes, NETargetInfo>(
+                *polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
         case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer:
-            return detail::create_fused_depthwise_convolution_batch_normalization_layer<NEFusedLayerTypes, NETargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
+            return detail::create_fused_depthwise_convolution_batch_normalization_layer<NEFusedLayerTypes,
+                                                                                        NETargetInfo>(
+                *polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
         case NodeType::L2NormalizeLayer:
-            return detail::create_l2_normalize_layer<NEL2NormalizeLayer, NETargetInfo>(*polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
+            return detail::create_l2_normalize_layer<NEL2NormalizeLayer, NETargetInfo>(
+                *polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
         case NodeType::NormalizationLayer:
-            return detail::create_normalization_layer<NENormalizationLayer, NETargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
+            return detail::create_normalization_layer<NENormalizationLayer, NETargetInfo>(
+                *polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
         case NodeType::PadLayer:
             return detail::create_pad_layer<NEPadLayer, NETargetInfo>(*polymorphic_downcast<PadLayerNode *>(node));
         case NodeType::PermuteLayer:
-            return detail::create_permute_layer<NEPermute, NETargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
+            return detail::create_permute_layer<NEPermute, NETargetInfo>(
+                *polymorphic_downcast<PermuteLayerNode *>(node));
         case NodeType::PoolingLayer:
-            return detail::create_pooling_layer<NEPoolingLayer, NETargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
+            return detail::create_pooling_layer<NEPoolingLayer, NETargetInfo>(
+                *polymorphic_downcast<PoolingLayerNode *>(node));
         case NodeType::PReluLayer:
-            return detail::create_prelu_layer<NEPReluLayer, NETargetInfo>(*polymorphic_downcast<PReluLayerNode *>(node));
+            return detail::create_prelu_layer<NEPReluLayer, NETargetInfo>(
+                *polymorphic_downcast<PReluLayerNode *>(node));
         case NodeType::PrintLayer:
             return detail::create_print_layer<NETargetInfo>(*polymorphic_downcast<PrintLayerNode *>(node));
         case NodeType::PriorBoxLayer:
-            return detail::create_priorbox_layer<NEPriorBoxLayer, NETargetInfo>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
+            return detail::create_priorbox_layer<NEPriorBoxLayer, NETargetInfo>(
+                *polymorphic_downcast<PriorBoxLayerNode *>(node));
         case NodeType::QuantizationLayer:
-            return detail::create_quantization_layer<NEQuantizationLayer, NETargetInfo>(*polymorphic_downcast<QuantizationLayerNode *>(node));
+            return detail::create_quantization_layer<NEQuantizationLayer, NETargetInfo>(
+                *polymorphic_downcast<QuantizationLayerNode *>(node));
         case NodeType::ReductionOperationLayer:
-            return detail::create_reduction_operation_layer<NEReductionOperation, NETargetInfo>(*polymorphic_downcast<ReductionLayerNode *>(node), ctx);
+            return detail::create_reduction_operation_layer<NEReductionOperation, NETargetInfo>(
+                *polymorphic_downcast<ReductionLayerNode *>(node), ctx);
         case NodeType::ReorgLayer:
-            return detail::create_reorg_layer<NEReorgLayer, NETargetInfo>(*polymorphic_downcast<ReorgLayerNode *>(node));
+            return detail::create_reorg_layer<NEReorgLayer, NETargetInfo>(
+                *polymorphic_downcast<ReorgLayerNode *>(node));
         case NodeType::ReshapeLayer:
-            return detail::create_reshape_layer<NEReshapeLayer, NETargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
+            return detail::create_reshape_layer<NEReshapeLayer, NETargetInfo>(
+                *polymorphic_downcast<ReshapeLayerNode *>(node));
         case NodeType::ResizeLayer:
             return detail::create_resize_layer<NEScale, NETargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
+        case NodeType::SliceLayer:
+            return detail::create_slice_layer<NESlice, NETargetInfo>(*polymorphic_downcast<SliceLayerNode *>(node));
         case NodeType::SoftmaxLayer:
-            return detail::create_softmax_layer<NESoftmaxLayer, NETargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
+            return detail::create_softmax_layer<NESoftmaxLayer, NETargetInfo>(
+                *polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
         case NodeType::StackLayer:
-            return detail::create_stack_layer<NEStackLayer, NETargetInfo>(*polymorphic_downcast<StackLayerNode *>(node));
+            return detail::create_stack_layer<NEStackLayer, NETargetInfo>(
+                *polymorphic_downcast<StackLayerNode *>(node));
         case NodeType::StridedSliceLayer:
-            return detail::create_strided_slice_layer<NEStridedSlice, NETargetInfo>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
-        case NodeType::UpsampleLayer:
-            return detail::create_upsample_layer<NEUpsampleLayer, NETargetInfo>(*polymorphic_downcast<UpsampleLayerNode *>(node), ctx);
-        case NodeType::YOLOLayer:
-            return detail::create_yolo_layer<NEYOLOLayer, NETargetInfo>(*polymorphic_downcast<YOLOLayerNode *>(node), ctx);
+            return detail::create_strided_slice_layer<NEStridedSlice, NETargetInfo>(
+                *polymorphic_downcast<StridedSliceLayerNode *>(node));
         default:
             return nullptr;
     }
diff --git a/src/graph/backends/NEON/NENodeValidator.cpp b/src/graph/backends/NEON/NENodeValidator.cpp
index a9e5a86249..a97806f92c 100644
--- a/src/graph/backends/NEON/NENodeValidator.cpp
+++ b/src/graph/backends/NEON/NENodeValidator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,22 +25,9 @@
 
 #include "arm_compute/graph/backends/ValidateHelpers.h"
 #include "arm_compute/graph/nodes/Nodes.h"
-
 #include "arm_compute/runtime/CPP/CPPFunctions.h"
 #include "arm_compute/runtime/NEON/NEFunctions.h"
-#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
-#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
-#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
-#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
-#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
-#include "src/core/NEON/kernels/NEReshapeLayerKernel.h"
-#include "src/core/NEON/kernels/NEWeightsReshapeKernel.h"
+
 #include "support/Cast.h"
 
 using namespace arm_compute::utils::cast;
@@ -51,16 +38,17 @@ namespace graph
 {
 namespace backends
 {
-/** Collection of NEON element-wise functions */
+/** Collection of CPU element-wise functions */
 struct NEEltwiseLayerFunctions
 {
     using ArithmeticAddition      = NEArithmeticAddition;
     using ArithmeticSubtraction   = NEArithmeticSubtraction;
     using PixelWiseMultiplication = NEPixelWiseMultiplication;
     using ElementwiseMax          = NEElementwiseMax;
+    using ArithmeticDivision      = NEElementwiseDivision;
 };
 
-/** Collection of NEON unary element-wise functions */
+/** Collection of CPU unary element-wise functions */
 struct NEUnaryEltwiseLayerFunctions
 {
     using ExpLayer = NEExpLayer;
@@ -68,41 +56,51 @@ struct NEUnaryEltwiseLayerFunctions
 
 Status NENodeValidator::validate(INode *node)
 {
-    if(node == nullptr)
+    if (node == nullptr)
     {
         return Status{};
     }
 
     NodeType type = node->type();
-    switch(type)
+    switch (type)
     {
         case NodeType::ArgMinMaxLayer:
-            return detail::validate_arg_min_max_layer<NEArgMinMaxLayer>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
+            return detail::validate_arg_min_max_layer<NEArgMinMaxLayer>(
+                *polymorphic_downcast<ArgMinMaxLayerNode *>(node));
         case NodeType::BoundingBoxTransformLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : BoundingBoxTransformLayer");
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR,
+                                            "Unsupported operation : BoundingBoxTransformLayer");
         case NodeType::ChannelShuffleLayer:
-            return detail::validate_channel_shuffle_layer<NEChannelShuffleLayer>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
+            return detail::validate_channel_shuffle_layer<NEChannelShuffleLayer>(
+                *polymorphic_downcast<ChannelShuffleLayerNode *>(node));
         case NodeType::ConvolutionLayer:
-            return detail::validate_convolution_layer<NEConvolutionLayer,
-                   NEDirectConvolutionLayer,
-                   NEGEMMConvolutionLayer,
-                   NEWinogradConvolutionLayer>(*polymorphic_downcast<ConvolutionLayerNode *>(node));
+            return detail::validate_convolution_layer<NEConvolutionLayer, NEDirectConvolutionLayer,
+                                                      NEGEMMConvolutionLayer, NEWinogradConvolutionLayer>(
+                *polymorphic_downcast<ConvolutionLayerNode *>(node));
         case NodeType::DepthToSpaceLayer:
-            return detail::validate_depth_to_space_layer<NEDepthToSpaceLayer>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
+            return detail::validate_depth_to_space_layer<NEDepthToSpaceLayer>(
+                *polymorphic_downcast<DepthToSpaceLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
-            return detail::validate_depthwise_convolution_layer<NEDepthwiseConvolutionLayer>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+            return detail::validate_depthwise_convolution_layer<NEDepthwiseConvolutionLayer>(
+                *polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::DequantizationLayer:
-            return detail::validate_dequantization_layer<NEDequantizationLayer>(*polymorphic_downcast<DequantizationLayerNode *>(node));
+            return detail::validate_dequantization_layer<NEDequantizationLayer>(
+                *polymorphic_downcast<DequantizationLayerNode *>(node));
         case NodeType::DetectionOutputLayer:
-            return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
+            return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(
+                *polymorphic_downcast<DetectionOutputLayerNode *>(node));
         case NodeType::DetectionPostProcessLayer:
-            return detail::validate_detection_post_process_layer<NEDetectionPostProcessLayer>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+            return detail::validate_detection_post_process_layer<NEDetectionPostProcessLayer>(
+                *polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
         case NodeType::GenerateProposalsLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : GenerateProposalsLayer");
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR,
+                                            "Unsupported operation : GenerateProposalsLayer");
         case NodeType::L2NormalizeLayer:
-            return detail::validate_l2_normalize_layer<NEL2NormalizeLayer>(*polymorphic_downcast<L2NormalizeLayerNode *>(node));
+            return detail::validate_l2_normalize_layer<NEL2NormalizeLayer>(
+                *polymorphic_downcast<L2NormalizeLayerNode *>(node));
         case NodeType::NormalizePlanarYUVLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : NormalizePlanarYUVLayer");
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR,
+                                            "Unsupported operation : NormalizePlanarYUVLayer");
         case NodeType::PadLayer:
             return detail::validate_pad_layer<NEPadLayer>(*polymorphic_downcast<PadLayerNode *>(node));
         case NodeType::PermuteLayer:
@@ -112,27 +110,29 @@ Status NENodeValidator::validate(INode *node)
         case NodeType::PriorBoxLayer:
             return detail::validate_priorbox_layer<NEPriorBoxLayer>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
         case NodeType::QuantizationLayer:
-            return detail::validate_quantization_layer<NEQuantizationLayer>(*polymorphic_downcast<QuantizationLayerNode *>(node));
+            return detail::validate_quantization_layer<NEQuantizationLayer>(
+                *polymorphic_downcast<QuantizationLayerNode *>(node));
         case NodeType::ReductionOperationLayer:
-            return detail::validate_reduction_operation_layer<NEReductionOperation>(*polymorphic_downcast<ReductionLayerNode *>(node));
+            return detail::validate_reduction_operation_layer<NEReductionOperation>(
+                *polymorphic_downcast<ReductionLayerNode *>(node));
         case NodeType::ReorgLayer:
             return detail::validate_reorg_layer<NEReorgLayer>(*polymorphic_downcast<ReorgLayerNode *>(node));
         case NodeType::ReshapeLayer:
             return detail::validate_reshape_layer<NEReshapeLayer>(*polymorphic_downcast<ReshapeLayerNode *>(node));
         case NodeType::ROIAlignLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ROIAlignLayer");
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR,
+                                            "Unsupported operation : ROIAlignLayer");
         case NodeType::SliceLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : SliceLayer");
+            return detail::validate_slice_layer<NESlice>(*polymorphic_downcast<SliceLayerNode *>(node));
         case NodeType::StridedSliceLayer:
-            return detail::validate_strided_slice_layer<NEStridedSlice>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
-        case NodeType::UpsampleLayer:
-            return detail::validate_upsample_layer<NEUpsampleLayer>(*polymorphic_downcast<UpsampleLayerNode *>(node));
-        case NodeType::YOLOLayer:
-            return detail::validate_yolo_layer<NEYOLOLayer>(*polymorphic_downcast<YOLOLayerNode *>(node));
+            return detail::validate_strided_slice_layer<NEStridedSlice>(
+                *polymorphic_downcast<StridedSliceLayerNode *>(node));
         case NodeType::EltwiseLayer:
-            return detail::validate_eltwise_Layer<NEEltwiseLayerFunctions>(*polymorphic_downcast<EltwiseLayerNode *>(node));
+            return detail::validate_eltwise_Layer<NEEltwiseLayerFunctions>(
+                *polymorphic_downcast<EltwiseLayerNode *>(node));
         case NodeType::UnaryEltwiseLayer:
-            return detail::validate_unary_eltwise_layer<NEUnaryEltwiseLayerFunctions>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
+            return detail::validate_unary_eltwise_layer<NEUnaryEltwiseLayerFunctions>(
+                *polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
         default:
             return Status{};
     }
diff --git a/src/graph/backends/NEON/NESubTensorHandle.cpp b/src/graph/backends/NEON/NESubTensorHandle.cpp
index 36f29d0d10..8964a00c5e 100644
--- a/src/graph/backends/NEON/NESubTensorHandle.cpp
+++ b/src/graph/backends/NEON/NESubTensorHandle.cpp
@@ -29,7 +29,10 @@ namespace graph
 {
 namespace backends
 {
-NESubTensorHandle::NESubTensorHandle(ITensorHandle *parent_handle, const TensorShape &shape, const Coordinates &coords, bool extend_parent)
+NESubTensorHandle::NESubTensorHandle(ITensorHandle     *parent_handle,
+                                     const TensorShape &shape,
+                                     const Coordinates &coords,
+                                     bool               extend_parent)
     : _sub_tensor(), _parent_handle(nullptr)
 {
     ARM_COMPUTE_ERROR_ON(!parent_handle);
@@ -95,4 +98,4 @@ Target NESubTensorHandle::target() const
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/NEON/NETensorHandle.cpp b/src/graph/backends/NEON/NETensorHandle.cpp
index 4393156e8a..dabf67060d 100644
--- a/src/graph/backends/NEON/NETensorHandle.cpp
+++ b/src/graph/backends/NEON/NETensorHandle.cpp
@@ -24,6 +24,7 @@
 #include "arm_compute/graph/backends/NEON/NETensorHandle.h"
 
 #include "arm_compute/runtime/MemoryGroup.h"
+
 #include "support/Cast.h"
 
 namespace arm_compute
@@ -32,8 +33,7 @@ namespace graph
 {
 namespace backends
 {
-NETensorHandle::NETensorHandle(const ITensorInfo &info)
-    : _tensor()
+NETensorHandle::NETensorHandle(const ITensorInfo &info) : _tensor()
 {
     _tensor.allocator()->init(info);
 }
@@ -50,7 +50,7 @@ void NETensorHandle::free()
 
 void NETensorHandle::manage(IMemoryGroup *mg)
 {
-    if(mg != nullptr)
+    if (mg != nullptr)
     {
         mg->manage(&_tensor);
     }
@@ -68,7 +68,7 @@ void NETensorHandle::unmap()
 void NETensorHandle::release_if_unused()
 {
     // TODO (geopin01): Release tensor only if all sub-tensors are marked as not used
-    if(!_tensor.is_used())
+    if (!_tensor.is_used())
     {
         _tensor.allocator()->free();
     }
@@ -100,4 +100,4 @@ Target NETensorHandle::target() const
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute