Apply clang-format on repository

Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
author: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> 2023-09-27 17:46:17 +0100
committer: felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> 2023-09-28 12:08:05 +0000
commit: afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree: 03bc7d5a762099989b16a656fa8d397b490ed70e /src/dynamic_fusion
parent: bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
download: ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz
115 files changed, 1637 insertions, 1676 deletions
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
index 15a5632d0b..9ca20fa152 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
@@ -22,14 +22,15 @@
  * SOFTWARE.
  */
 #include "ClKernelRuntime.h"
+
 #include "arm_compute/core/CL/ICLTensor.h"
+
 #include "src/core/CL/CLUtils.h"
 #ifdef ACL_INTERNAL_TEST_CKW_IN_DF
 #include "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h"
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
 #include "src/gpu/cl/ClKernelLibrary.h"
-
 #include "support/Cast.h"
 namespace arm_compute
 {
@@ -43,13 +44,12 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe
 {
     // Create kernel from kernel source string
     opencl::ClKernelLibrary &klib = opencl::ClKernelLibrary::get();
-    _kernel                       = static_cast<cl::Kernel>(compile_ctx.create_kernel(code.name(),
-                                                                                      code.name(), // program name has to be provided to differentiate between different unfusable components' kernels.
-                                                                                      // Each program contains exactly one kernel
-                                                                                      code.code(),
-                                                                                      klib.kernel_path() /* Kernel path: Used in cases of embedded kernels */,
-                                                                                      code.build_options().options(),
-                                                                                      false /* Is source binary */));
+    _kernel                       = static_cast<cl::Kernel>(compile_ctx.create_kernel(
+                              code.name(),
+                              code.name(), // program name has to be provided to differentiate between different unfusable components' kernels.
+                              // Each program contains exactly one kernel
+                              code.code(), klib.kernel_path() /* Kernel path: Used in cases of embedded kernels */,
+                              code.build_options().options(), false /* Is source binary */));
 
     // Configure execution window
     IClKernel::configure_internal(code.window());
@@ -63,11 +63,15 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe
 
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
 
-inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKernelArgumentInfo &arg, const ICLTensor *tensor, const Window &arg_slice, std::vector<cl::Image2D> &cl_images)
+inline void ClKernelRuntime::add_tensor_argument(unsigned int                &idx,
+                                                 const GpuKernelArgumentInfo &arg,
+                                                 const ICLTensor             *tensor,
+                                                 const Window                &arg_slice,
+                                                 std::vector<cl::Image2D>    &cl_images)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
 
-    switch(arg.type)
+    switch (arg.type)
     {
         case GpuKernelArgumentInfo::Type::Scalar:
         {
@@ -95,9 +99,13 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
         }
         case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
         {
-            const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * tensor->info()->dimension(2) * tensor->info()->dimension(3));
+            const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) *
+                                                                            tensor->info()->dimension(2) *
+                                                                            tensor->info()->dimension(3));
             const size_t      image_row_pitch = tensor->info()->strides_in_bytes()[1];
-            cl::Image2D       tensor_image2d  = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
+            cl::Image2D       tensor_image2d =
+                create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d,
+                                           tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
             cl_images.push_back(tensor_image2d);
             _kernel.setArg(idx++, tensor_image2d);
             break;
@@ -111,9 +119,13 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
         }
         case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
         {
-            const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * tensor->info()->dimension(2) * tensor->info()->dimension(3));
+            const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) *
+                                                                            tensor->info()->dimension(2) *
+                                                                            tensor->info()->dimension(3));
             const size_t      image_row_pitch = tensor->info()->strides_in_bytes()[1];
-            cl::Image2D       tensor_image2d  = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
+            cl::Image2D       tensor_image2d =
+                create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d,
+                                           tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
             cl_images.push_back(tensor_image2d);
             _kernel.setArg(idx++, tensor_image2d);
             _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
@@ -142,8 +154,9 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
             const size_t image_h        = tensor->info()->tensor_shape().total_size_upper(1);
             const size_t image_stride_y = tensor->info()->strides_in_bytes()[1];
 
-            cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(),
-                                                                    TensorShape(image_w, image_h), tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly);
+            cl::Image2D tensor_image2d = create_image2d_from_buffer(
+                CLKernelLibrary::get().context(), tensor->cl_buffer(), TensorShape(image_w, image_h),
+                tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly);
             cl_images.push_back(tensor_image2d);
 
             _kernel.setArg(idx++, tensor_image2d);
@@ -170,13 +183,16 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
 }
 
 #else // ACL_INTERNAL_TEST_CKW_IN_DF
-inline void ClKernelRuntime::add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, std::vector<cl::Image2D> &cl_images)
+inline void ClKernelRuntime::add_kernel_argument(unsigned int                   &idx,
+                                                 const GpuKernelArgumentBinding &arg,
+                                                 const ICLTensor                *tensor,
+                                                 std::vector<cl::Image2D>       &cl_images)
 {
-    switch(arg.type())
+    switch (arg.type())
     {
         case GpuKernelArgumentBinding::Type::TensorStorage:
         {
-            switch(arg.tensor_storage_type())
+            switch (arg.tensor_storage_type())
             {
                 case TensorStorageType::ClBufferUint8Ptr:
                 {
@@ -238,7 +254,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
         // CLImages created from tensor arguments. Need to be retained until enqueue
         std::vector<cl::Image2D> cl_images;
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-        for(auto id_arg : _arguments)
+        for (auto id_arg : _arguments)
         {
             const auto arg    = id_arg.second;
             auto       tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(id_arg.first));
@@ -248,7 +264,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
         }
 
 #else  // ACL_INTERNAL_TEST_CKW_IN_DF
-        for(const auto &arg : _arguments)
+        for (const auto &arg : _arguments)
         {
             auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(arg.id()));
             ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
@@ -259,8 +275,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
 
         // Dispatch kernel
         enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items);
-    }
-    while(skip_sliding_window && window.slide_window_slice_3D(slice));
+    } while (skip_sliding_window && window.slide_window_slice_3D(slice));
 }
 
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
index 92e73503ce..e78567eb9d 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
@@ -68,7 +68,11 @@ private:
      * @param[in]     arg_slice Window the kernel will be run on
      * @param[out]    cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued)
      */
-    inline void add_tensor_argument(unsigned int &idx, const GpuKernelArgumentInfo &arg, const ICLTensor *tensor, const Window &arg_slice, std::vector<cl::Image2D> &cl_images);
+    inline void add_tensor_argument(unsigned int                &idx,
+                                    const GpuKernelArgumentInfo &arg,
+                                    const ICLTensor             *tensor,
+                                    const Window                &arg_slice,
+                                    std::vector<cl::Image2D>    &cl_images);
 #else  // ACL_INTERNAL_TEST_CKW_IN_DF
     /** Set a kernel argument as part of a tensor
      *
@@ -77,7 +81,10 @@ private:
      * @param[in]     tensor    Tensor of which the kernel argument @p arg is a part of
      * @param[out]    cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued)
      */
-    inline void add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, std::vector<cl::Image2D> &cl_images);
+    inline void add_kernel_argument(unsigned int                   &idx,
+                                    const GpuKernelArgumentBinding &arg,
+                                    const ICLTensor                *tensor,
+                                    std::vector<cl::Image2D>       &cl_images);
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 
 private:
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp
index cd21b10180..ba39ff4c9d 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp
@@ -25,6 +25,7 @@
 
 #include "arm_compute/core/experimental/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
+
 #include "src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
@@ -55,14 +56,14 @@ public:
     {
         DataView() = default;
         DataView(CLTensor *tensor, const TensorInfo &tensor_info, const AuxMemoryInfo &memory_info)
-            : tensor{ tensor }, tensor_info{ tensor_info }, memory_info{ memory_info }
+            : tensor{tensor}, tensor_info{tensor_info}, memory_info{memory_info}
         {
         }
-        ~DataView()                     = default;
-        DataView(const DataView &other) = default;
+        ~DataView()                                = default;
+        DataView(const DataView &other)            = default;
         DataView &operator=(const DataView &other) = default;
         DataView(DataView &&other)                 = default;
-        DataView &operator=(DataView &&other) = default;
+        DataView     &operator=(DataView &&other)  = default;
         CLTensor     *tensor{};      /**< Pointer to the auxiliary tensor */
         TensorInfo    tensor_info{}; /**< Associated tensor info */
         AuxMemoryInfo memory_info{}; /**< Memory requirement */
@@ -92,7 +93,7 @@ private:
     {
         const auto t_id             = tensor_info.id();
         auto       find_tensor_pair = _owned_tensors.find(t_id);
-        if(find_tensor_pair != _owned_tensors.end())
+        if (find_tensor_pair != _owned_tensors.end())
         {
             return find_tensor_pair->second.get();
         }
@@ -107,7 +108,7 @@ private:
     }
 
     std::map<ITensorInfo::Id, std::unique_ptr<CLTensor>> _owned_tensors{};
-    std::vector<DataView> _tensors{};
+    std::vector<DataView>                                _tensors{};
 };
 /** Construct auxiliary tensors required by @ref GpuWorkloadSourceCode
  *
@@ -120,12 +121,12 @@ private:
  */
 Status create_aux_tensors(ClAuxTensors *aux_tensors, const GpuWorkloadSourceCode &code)
 {
-    for(auto t_id : code.tensors())
+    for (auto t_id : code.tensors())
     {
         // Get tensor object
         const auto workload_arg  = code.query_tensor(t_id);
         ICLTensor *tensor_object = nullptr;
-        if(workload_arg->memory_descriptor()->memory_type == MemoryType::Auxiliary)
+        if (workload_arg->memory_descriptor()->memory_type == MemoryType::Auxiliary)
         {
             // Create aux tensor CLTensor object
             const TensorInfo tensor_info = *workload_arg->tensor_info();
@@ -133,7 +134,7 @@ Status create_aux_tensors(ClAuxTensors *aux_tensors, const GpuWorkloadSourceCode
             const auto aux_memory_info = workload_arg->memory_descriptor()->aux_memory_info;
             tensor_object              = aux_tensors->add_aux_tensor(tensor_info, aux_memory_info);
 
-            if(tensor_object == nullptr)
+            if (tensor_object == nullptr)
             {
                 return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Failed to construct an auxiliary tensor");
             }
@@ -156,7 +157,7 @@ public:
     ITensorPack *find_tensor_pack(UnitWorkloadId uwk_id)
     {
         auto tensor_pack = _tensor_packs.find(uwk_id);
-        if(tensor_pack != _tensor_packs.end())
+        if (tensor_pack != _tensor_packs.end())
         {
             return &(tensor_pack->second);
         }
@@ -173,7 +174,10 @@ public:
         return _tensor_packs.at(uwk_id);
     }
 
-    friend Status create_tensor_lut(ClTensorLUT *tensor_lut, const GpuWorkloadSourceCode &code, const std::vector<CLTensor *> &user_tensors, const ClAuxTensors &aux_tensors);
+    friend Status create_tensor_lut(ClTensorLUT                   *tensor_lut,
+                                    const GpuWorkloadSourceCode   &code,
+                                    const std::vector<CLTensor *> &user_tensors,
+                                    const ClAuxTensors            &aux_tensors);
 
 private:
     /** Add a tensor pack and associate it with @ref UnitWorkloadId @p uwk_id
@@ -197,19 +201,22 @@ private:
  *
  * @return Status
  */
-Status create_tensor_lut(ClTensorLUT *tensor_lut, const GpuWorkloadSourceCode &code, const std::vector<CLTensor *> &user_tensors, const ClAuxTensors &aux_tensors)
+Status create_tensor_lut(ClTensorLUT                   *tensor_lut,
+                         const GpuWorkloadSourceCode   &code,
+                         const std::vector<CLTensor *> &user_tensors,
+                         const ClAuxTensors            &aux_tensors)
 {
     // Combine user tensors and aux tensors
     std::map<ITensorInfo::Id, CLTensor *> tensor_map;
-    for(auto tensor : user_tensors)
+    for (auto tensor : user_tensors)
     {
         const auto t_id = tensor->info()->id();
 
-        if(tensor_map.find(t_id) != tensor_map.end())
+        if (tensor_map.find(t_id) != tensor_map.end())
         {
             // In case of elementwise in-place: give another Id to the In/Out tensor when passed again
             std::vector<ITensorInfo::Id> ids;
-            for(auto &t : tensor_map)
+            for (auto &t : tensor_map)
             {
                 ids.push_back(t.first);
             }
@@ -221,11 +228,11 @@ Status create_tensor_lut(ClTensorLUT *tensor_lut, const GpuWorkloadSourceCode &c
             tensor_map[t_id] = tensor;
         }
     }
-    for(const auto &data : aux_tensors.get_tensors())
+    for (const auto &data : aux_tensors.get_tensors())
     {
         const auto t_id   = data.tensor_info.id();
         const auto tensor = data.tensor;
-        if(tensor_map.find(t_id) != tensor_map.end())
+        if (tensor_map.find(t_id) != tensor_map.end())
         {
             return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Clashing tensor ids");
         }
@@ -233,25 +240,25 @@ Status create_tensor_lut(ClTensorLUT *tensor_lut, const GpuWorkloadSourceCode &c
     }
 
     // Add tensor objects into corresponding tensor packs
-    for(auto id_tensor : tensor_map)
+    for (auto id_tensor : tensor_map)
     {
         const auto t_id          = id_tensor.first;
         const auto tensor_object = id_tensor.second;
-        if(tensor_object == nullptr)
+        if (tensor_object == nullptr)
         {
             return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Trying to add a nullptr into the tensor packs");
         }
-        if(tensor_object->allocator()->info().total_size() == 0U)
+        if (tensor_object->allocator()->info().total_size() == 0U)
         {
             return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "No allocated memory found in tensor");
         }
 
-        for(auto uwk_id : code.get_unit_workloads_from_tensor(t_id))
+        for (auto uwk_id : code.get_unit_workloads_from_tensor(t_id))
         {
             ITensorPack *tensor_pack = tensor_lut->find_tensor_pack(uwk_id);
-            if(tensor_pack == nullptr)
+            if (tensor_pack == nullptr)
             {
-                tensor_lut->add_tensor_pack(uwk_id, ITensorPack{ { t_id, tensor_object } });
+                tensor_lut->add_tensor_pack(uwk_id, ITensorPack{{t_id, tensor_object}});
             }
             else
             {
@@ -269,15 +276,14 @@ struct ClWorkloadRuntime::Implementation
 {
     std::map<UnitWorkloadId, std::unique_ptr<ClKernelRuntime>> _kernels{};
     std::map<UnitWorkloadId, std::unique_ptr<ClKernelRuntime>> _kernels_prep{};
-    bool                  _is_configured{ false };
-    bool                  _is_prepared{ false };
-    ClTensorLUT           _tensor_lut{};
-    ClAuxTensors          _aux_tensors{};
-    GpuWorkloadSourceCode _source_code{};
+    bool                                                       _is_configured{false};
+    bool                                                       _is_prepared{false};
+    ClTensorLUT                                                _tensor_lut{};
+    ClAuxTensors                                               _aux_tensors{};
+    GpuWorkloadSourceCode                                      _source_code{};
 };
 
-ClWorkloadRuntime::ClWorkloadRuntime()
-    : _impl{ std::make_unique<Implementation>() }
+ClWorkloadRuntime::ClWorkloadRuntime() : _impl{std::make_unique<Implementation>()}
 {
 }
 
@@ -286,18 +292,19 @@ ClWorkloadRuntime::~ClWorkloadRuntime() = default;
 Status ClWorkloadRuntime::configure(const GpuWorkloadSketch &sketch)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(_impl->_is_configured, "ClWorkloadRuntime cannot be re-configured");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(sketch.gpu_context()->gpu_language() != GpuLanguage::OpenCL, "ClWorkloadRuntime cannot be configured with non-OpenCL workload sketch");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(sketch.gpu_context()->gpu_language() != GpuLanguage::OpenCL,
+                                    "ClWorkloadRuntime cannot be configured with non-OpenCL workload sketch");
     // Generate source code
     _impl->_source_code = sketch.implementation().generate_source_code();
     // Configure unit workload from source code
-    for(auto uwk_id : _impl->_source_code.unit_workloads())
+    for (auto uwk_id : _impl->_source_code.unit_workloads())
     {
         const auto work  = _impl->_source_code.query_unit_workload(uwk_id);
         const auto stage = work.stage().stage;
         auto       k     = std::make_unique<ClKernelRuntime>();
         k->configure(*sketch.gpu_context()->cl_compile_context(), work.code());
 
-        switch(stage)
+        switch (stage)
         {
             case UnitWorkloadStage::Stage::Run:
             {
@@ -323,9 +330,9 @@ Status ClWorkloadRuntime::configure(const GpuWorkloadSketch &sketch)
 
 void ClWorkloadRuntime::prepare()
 {
-    if(!_impl->_is_prepared)
+    if (!_impl->_is_prepared)
     {
-        for(auto &id_kernel_pair : _impl->_kernels_prep)
+        for (auto &id_kernel_pair : _impl->_kernels_prep)
         {
             const bool flush_queue = false;
             const auto uwk_id      = id_kernel_pair.first;
@@ -344,7 +351,7 @@ Status ClWorkloadRuntime::run(const std::vector<CLTensor *> &tensors)
     const auto st = create_tensor_lut(&_impl->_tensor_lut, _impl->_source_code, tensors, _impl->_aux_tensors);
     ARM_COMPUTE_RETURN_ON_ERROR(st);
     prepare();
-    for(auto &id_kernel_pair : _impl->_kernels)
+    for (auto &id_kernel_pair : _impl->_kernels)
     {
         // Flush the command queue on the last kernel
         const bool flush_queue = false;
@@ -358,7 +365,7 @@ Status ClWorkloadRuntime::run(const std::vector<CLTensor *> &tensors)
 std::vector<std::tuple<CLTensor *, TensorInfo, AuxMemoryInfo>> ClWorkloadRuntime::get_auxiliary_tensors()
 {
     std::vector<std::tuple<CLTensor *, TensorInfo, AuxMemoryInfo>> aux_tensors;
-    for(const auto &data : _impl->_aux_tensors.get_tensors())
+    for (const auto &data : _impl->_aux_tensors.get_tensors())
     {
         aux_tensors.emplace_back(data.tensor, data.tensor_info, data.memory_info);
     }
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp
index 84fb279237..7044b0ea66 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp
@@ -30,14 +30,17 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, const ICLTensor *tensor, TensorComponentType component)
+void cl_add_tensor_component_argument(cl::Kernel         &kernel,
+                                      unsigned int       &idx,
+                                      const ICLTensor    *tensor,
+                                      TensorComponentType component)
 {
     ARM_COMPUTE_ERROR_ON(tensor == nullptr);
 
     const auto *info    = tensor->info();
     const auto &strides = info->strides_in_bytes();
 
-    switch(component)
+    switch (component)
     {
         case TensorComponentType::OffsetFirstElement:
             kernel.setArg<cl_uint>(idx++, info->offset_first_element_in_bytes());
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h
index 4cbb157a48..306d547acb 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h
+++ b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h
@@ -42,7 +42,10 @@ namespace dynamic_fusion
  * @param[in]     tensor    Tensor from which to access the tensor component.
  * @param[in]     component Tensor component to select such as tensor dimensions, strides, etc.
  */
-void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, const ICLTensor *tensor, TensorComponentType component);
+void cl_add_tensor_component_argument(cl::Kernel         &kernel,
+                                      unsigned int       &idx,
+                                      const ICLTensor    *tensor,
+                                      TensorComponentType component);
 
 /** Add an OpenCL buffer object to the kernel's arguments at the specified index @p idx.
  *
diff --git a/src/dynamic_fusion/sketch/ArgumentPack.h b/src/dynamic_fusion/sketch/ArgumentPack.h
index f118d7d851..3bf380b1ec 100644
--- a/src/dynamic_fusion/sketch/ArgumentPack.h
+++ b/src/dynamic_fusion/sketch/ArgumentPack.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_ARGUMENTPACK
 
 #include "arm_compute/core/experimental/Types.h"
+
 #include <unordered_map>
 #include <vector>
 
@@ -52,26 +53,21 @@ public:
      */
     struct PackElement
     {
-        PackElement()                        = default;
-        PackElement(const PackElement &elem) = default;
+        PackElement()                                   = default;
+        PackElement(const PackElement &elem)            = default;
         PackElement &operator=(const PackElement &elem) = default;
         PackElement(PackElement &&elem)                 = default;
-        PackElement &operator=(PackElement &&elem) = default;
-        PackElement(Id id, T *tensor)
-            : id(id), tensor(tensor), ctensor(nullptr)
+        PackElement &operator=(PackElement &&elem)      = default;
+        PackElement(Id id, T *tensor) : id(id), tensor(tensor), ctensor(nullptr)
         {
         }
-        PackElement(Id id, const T *ctensor)
-            : id(id), tensor(nullptr), ctensor(ctensor)
+        PackElement(Id id, const T *ctensor) : id(id), tensor(nullptr), ctensor(ctensor)
         {
         }
 
-        Id       id{ ACL_UNKNOWN }; /**< Argument id within the pack */
-        T       *tensor{ nullptr }; /**< Non-const pointer to tensor-related object */
-        const T *ctensor
-        {
-            nullptr
-        }; /**< Const pointer to tensor-related object */
+        Id       id{ACL_UNKNOWN};  /**< Argument id within the pack */
+        T       *tensor{nullptr};  /**< Non-const pointer to tensor-related object */
+        const T *ctensor{nullptr}; /**< Const pointer to tensor-related object */
     };
 
 public:
@@ -88,10 +84,9 @@ public:
     /** Allow instances of this class to be moved */
     ArgumentPack<T> &operator=(ArgumentPack<T> &&other) = default;
     /** Initializer list Constructor */
-    ArgumentPack(const std::initializer_list<PackElement> &l)
-        : _pack{}
+    ArgumentPack(const std::initializer_list<PackElement> &l) : _pack{}
     {
-        for(const auto &e : l)
+        for (const auto &e : l)
         {
             _pack[e.id] = e;
         }
@@ -134,7 +129,7 @@ public:
     const T *get_const_tensor(Id id) const
     {
         auto it = _pack.find(id);
-        if(it != _pack.end())
+        if (it != _pack.end())
         {
             return it->second.ctensor != nullptr ? it->second.ctensor : it->second.tensor;
         }
@@ -171,10 +166,10 @@ public:
     std::vector<T *> get_src_tensors()
     {
         std::vector<T *> src_tensors{};
-        for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
+        for (int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
         {
             auto tensor = get_tensor(static_cast<TensorType>(id));
-            if(tensor != nullptr)
+            if (tensor != nullptr)
             {
                 src_tensors.push_back(tensor);
             }
@@ -188,10 +183,10 @@ public:
     std::vector<const T *> get_const_src_tensors() const
     {
         std::vector<const T *> src_tensors{};
-        for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
+        for (int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
         {
             auto tensor = get_const_tensor(static_cast<TensorType>(id));
-            if(tensor != nullptr)
+            if (tensor != nullptr)
             {
                 src_tensors.push_back(tensor);
             }
@@ -205,10 +200,10 @@ public:
     std::vector<T *> get_dst_tensors()
     {
         std::vector<T *> dst_tensors{};
-        for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
+        for (int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
         {
             auto tensor = get_tensor(static_cast<TensorType>(id));
-            if(tensor != nullptr)
+            if (tensor != nullptr)
             {
                 dst_tensors.push_back(tensor);
             }
@@ -222,10 +217,10 @@ public:
     std::vector<const T *> get_const_dst_tensors() const
     {
         std::vector<const T *> dst_tensors{};
-        for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
+        for (int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
         {
             auto tensor = get_const_tensor(static_cast<TensorType>(id));
-            if(tensor != nullptr)
+            if (tensor != nullptr)
             {
                 dst_tensors.push_back(tensor);
             }
diff --git a/src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp b/src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp
index 3a5657e07b..6f3816568c 100644
--- a/src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp
+++ b/src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp
@@ -69,7 +69,8 @@ uint32_t DepthwiseConv2dAttributes::depth_multiplier() const
     return _depth_multiplier;
 }
 
-DepthwiseConv2dAttributes &DepthwiseConv2dAttributes::dimension_rounding_type(const DimensionRoundingType &dimension_rounding_type)
+DepthwiseConv2dAttributes &
+DepthwiseConv2dAttributes::dimension_rounding_type(const DimensionRoundingType &dimension_rounding_type)
 {
     _dimension_rounding_type = dimension_rounding_type;
     return *this;
diff --git a/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp b/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp
index c28791f5fe..80f65f926a 100644
--- a/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp
+++ b/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp
@@ -23,6 +23,7 @@
  */
 
 #include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
+
 #include "arm_compute/core/Size2D.h"
 
 namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
index 226e1a2df3..03817173f4 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
@@ -61,11 +61,10 @@ struct GpuKernelArgumentInfo
     /** Default constructor */
     GpuKernelArgumentInfo() = default;
     /** Constructor */
-    GpuKernelArgumentInfo(Type type)
-        : type{ type }
+    GpuKernelArgumentInfo(Type type) : type{type}
     {
     }
-    Type type{ Type::Tensor_4D_t_Buffer };
+    Type type{Type::Tensor_4D_t_Buffer};
 };
 bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1);
 /** Kernel argument information linked with its corresponding @ref ITensorInfo
@@ -79,10 +78,8 @@ public:
      * @param[in] tensor_info     Associated @ref ITensorInfo
      * @param[in] kernel_arg_info Associated @ref GpuKernelArgumentInfo
      */
-    GpuKernelArgument(const ITensorInfo           &tensor_info,
-                      const GpuKernelArgumentInfo &kernel_arg_info)
-        : _tensor_info{ tensor_info },
-          _kernel_arg_info{ kernel_arg_info }
+    GpuKernelArgument(const ITensorInfo &tensor_info, const GpuKernelArgumentInfo &kernel_arg_info)
+        : _tensor_info{tensor_info}, _kernel_arg_info{kernel_arg_info}
     {
     }
     /** Get workload tensor id */
@@ -200,12 +197,12 @@ public:
         TensorComponent /** @ref TensorComponentType */
     };
     GpuKernelArgumentBinding(ITensorInfo::Id id, TensorStorageType storage)
-        : _type{ Type::TensorStorage }, _id{ id }, _value{}
+        : _type{Type::TensorStorage}, _id{id}, _value{}
     {
         _value.tensor_storage_type = storage;
     }
     GpuKernelArgumentBinding(ITensorInfo::Id id, TensorComponentType component)
-        : _type{ Type::TensorComponent }, _id{ id }, _value{}
+        : _type{Type::TensorComponent}, _id{id}, _value{}
     {
         _value.tensor_component_type = component;
     }
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
index 5a65ede38b..1a458c9862 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
@@ -31,35 +31,31 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-std::vector<DependencyGraph::TensorId> GpuKernelComponentGraph::get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
+std::vector<DependencyGraph::TensorId>
+GpuKernelComponentGraph::get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
 {
     std::vector<DependencyGraph::TensorId> tensor_ids{};
-    std::transform(
-        std::begin(tensors), std::end(tensors),
-        std::back_inserter(tensor_ids),
-        [](const auto & t)
-    {
-        return t->id();
-    });
+    std::transform(std::begin(tensors), std::end(tensors), std::back_inserter(tensor_ids),
+                   [](const auto &t) { return t->id(); });
     return tensor_ids;
 }
 
 GpuKernelComponentGraph::GpuKernelComponentGraph(GpuWorkloadContext *context, GpuComponentServices *services)
-    : _context{ context }, _services{ services }, _components{}, _tensors{}, _dependency_graph{}
+    : _context{context}, _services{services}, _components{}, _tensors{}, _dependency_graph{}
 {
 }
 
 GpuKernelComponentStream GpuKernelComponentGraph::fuse(const MemoryDescriptorMap &mem_map) const
 {
-    GpuKernelComponentStream stream{ _context, _services, mem_map };
+    GpuKernelComponentStream stream{_context, _services, mem_map};
     const auto               op_seq = _dependency_graph.build_operators_sequence();
 
     stream.new_component_group();
-    for(auto op : op_seq)
+    for (auto op : op_seq)
     {
         const auto component = _components.at(op.op).get();
         const auto success   = stream.add_component(component);
-        if(!success) // Assume first failure was because the root component is unfusable
+        if (!success) // Assume first failure was because the root component is unfusable
         {
             stream.new_component_group();
             const auto success = stream.add_component(component);
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
index 85c9b45840..6f871a3c90 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
@@ -70,21 +70,21 @@ public:
      * @param[in] args Component arguments except for component id, which is auto-allocated
      */
     template <typename T, typename... Args>
-    void add_new_component(Args &&... args)
+    void add_new_component(Args &&...args)
     {
-        auto                      comp           = _services->component_factory().create<T>(std::forward<Args>(args)...);
-        ArgumentPack<ITensorInfo> tensors        = comp->tensors();
+        auto                      comp    = _services->component_factory().create<T>(std::forward<Args>(args)...);
+        ArgumentPack<ITensorInfo> tensors = comp->tensors();
         const auto                src_tensor_ids = get_tensor_ids(tensors.get_const_src_tensors());
         const auto                dst_tensor_ids = get_tensor_ids(tensors.get_const_dst_tensors());
-        bool                      success        = _dependency_graph.add_operator(comp->id(), src_tensor_ids, dst_tensor_ids);
+        bool                      success = _dependency_graph.add_operator(comp->id(), src_tensor_ids, dst_tensor_ids);
         ARM_COMPUTE_UNUSED(success);
         ARM_COMPUTE_ERROR_ON(!success);
         _components[comp->id()] = std::move(comp);
-        for(auto t : tensors.get_const_src_tensors())
+        for (auto t : tensors.get_const_src_tensors())
         {
             _tensors[t->id()] = t;
         }
-        for(auto t : tensors.get_const_dst_tensors())
+        for (auto t : tensors.get_const_dst_tensors())
         {
             _tensors[t->id()] = t;
         }
@@ -99,11 +99,11 @@ public:
 
 private:
     static std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors);
-    GpuWorkloadContext   *_context;
-    GpuComponentServices *_services;
+    GpuWorkloadContext                           *_context;
+    GpuComponentServices                         *_services;
     std::map<ComponentId, std::unique_ptr<IGpuKernelComponent>> _components;
     std::map<ITensorInfo::Id, const ITensorInfo *>              _tensors;
-    DependencyGraph _dependency_graph{};
+    DependencyGraph                                             _dependency_graph{};
 };
 } // namespace dynamic_fusion
 } // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
index 81c3f0c800..5a6d125d96 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
@@ -25,6 +25,7 @@
 
 #include "arm_compute/core/ITensorInfo.h"
 #include "arm_compute/core/Validate.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 
 #include <algorithm>
@@ -37,86 +38,87 @@ namespace dynamic_fusion
 {
 bool GpuKernelComponentGroup::add_component(ComponentPtr component)
 {
-    ARM_COMPUTE_ERROR_ON_MSG(
-        _finalized, "The component group has been finalized and cannot be altered.");
+    ARM_COMPUTE_ERROR_ON_MSG(_finalized, "The component group has been finalized and cannot be altered.");
 
     // note: Constraint 1 is guaranteed as a precondition
     // Constraint 2
-    if(component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
+    if (component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
     {
         return false;
     }
     // Constraint 3.1: Pattern: (Unfusable + Output)
-    if(!_components.empty() && get_root_component()->type() == GpuComponentType::Unfusable && component->type() != GpuComponentType::Output)
+    if (!_components.empty() && get_root_component()->type() == GpuComponentType::Unfusable &&
+        component->type() != GpuComponentType::Output)
     {
         return false;
     }
     // Constraint 3.2
-    if(!_components.empty() && (component->type() != GpuComponentType::Simple && component->type() != GpuComponentType::Output))
+    if (!_components.empty() &&
+        (component->type() != GpuComponentType::Simple && component->type() != GpuComponentType::Output))
     {
         return false;
     }
     // Constraint 4
-    if(component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
+    if (component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
     {
         return false;
     }
     // Constraint 5
-    if(!_components.empty() && !(get_root_component()->properties() == component->properties()))
+    if (!_components.empty() && !(get_root_component()->properties() == component->properties()))
     {
         return false;
     }
     // Constraint 7
-    if(!_components.empty())
+    if (!_components.empty())
     {
         const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
         ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
         const auto first_dst_tensor = root_dst_tensors[0];
         const auto dst_tensors      = component->tensors().get_const_dst_tensors();
-        for(const auto &t : root_dst_tensors)
+        for (const auto &t : root_dst_tensors)
         {
-            if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+            if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
             {
                 return false;
             }
         }
-        for(const auto &t : dst_tensors)
+        for (const auto &t : dst_tensors)
         {
-            if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+            if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
             {
                 return false;
             }
         }
     }
     // Constraint 8
-    if(!_components.empty())
+    if (!_components.empty())
     {
         const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
         ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
         const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
         const auto dst_tensors             = component->tensors().get_const_dst_tensors();
-        for(const auto &t : root_dst_tensors)
+        for (const auto &t : root_dst_tensors)
         {
-            if(t->data_layout() != first_dst_tensor_layout)
+            if (t->data_layout() != first_dst_tensor_layout)
             {
                 return false;
             }
         }
-        for(const auto &t : dst_tensors)
+        for (const auto &t : dst_tensors)
         {
-            if(t->data_layout() != first_dst_tensor_layout)
+            if (t->data_layout() != first_dst_tensor_layout)
             {
                 return false;
             }
         }
     }
     // Constraint 9
-    if(component->tensors().get_const_dst_tensors().size() >= max_dst_tensors)
+    if (component->tensors().get_const_dst_tensors().size() >= max_dst_tensors)
     {
         return false;
     }
     // Constraint 9 corollary
-    if(component->type() == GpuComponentType::Output && _components.size() >= max_fused_components + max_dst_tensors)
+    if (component->type() == GpuComponentType::Output && _components.size() >= max_fused_components + max_dst_tensors)
     {
         return false;
     }
@@ -126,36 +128,36 @@ bool GpuKernelComponentGroup::add_component(ComponentPtr component)
 
 void GpuKernelComponentGroup::finalize()
 {
-    if(_finalized)
+    if (_finalized)
     {
         return;
     }
 
     _finalized = true;
 
-    std::set<const ITensorInfo *> output_tensors;
+    std::set<const ITensorInfo *>                                   output_tensors;
     std::map<const ITensorInfo *, std::vector<const ITensorInfo *>> possible_tile_map;
-    std::map<const ITensorInfo *, int32_t> tile_usages;
+    std::map<const ITensorInfo *, int32_t>                          tile_usages;
 
-    for(auto component : _components)
+    for (auto component : _components)
     {
-        const auto tensors = component->tensors();
+        const auto tensors     = component->tensors();
         const auto src_tensors = tensors.get_const_src_tensors();
         const auto dst_tensors = tensors.get_const_dst_tensors();
 
         // Detect input, output and intermediate tensors.
-        for(auto tensor : src_tensors)
+        for (auto tensor : src_tensors)
         {
             const auto output_tensors_it = output_tensors.find(tensor);
 
-            if(output_tensors_it != output_tensors.end())
+            if (output_tensors_it != output_tensors.end())
             {
                 // This tensor is the output of another operator.
                 // It must be marked as intermediate tensor.
                 output_tensors.erase(output_tensors_it);
                 _interm_tensors.insert(tensor);
             }
-            else if(_interm_tensors.find(tensor) == _interm_tensors.end())
+            else if (_interm_tensors.find(tensor) == _interm_tensors.end())
             {
                 _input_tensors.insert(tensor);
 
@@ -164,7 +166,7 @@ void GpuKernelComponentGroup::finalize()
             }
         }
 
-        for(auto tensor : dst_tensors)
+        for (auto tensor : dst_tensors)
         {
             ARM_COMPUTE_ERROR_ON(_input_tensors.find(tensor) != _input_tensors.end());
             ARM_COMPUTE_ERROR_ON(output_tensors.find(tensor) != output_tensors.end());
@@ -177,27 +179,27 @@ void GpuKernelComponentGroup::finalize()
 
         // Check if the output can overwrite the input tile.
         const auto component_type = component->type();
-        if(component_type == GpuComponentType::Simple || component_type == GpuComponentType::Output)
+        if (component_type == GpuComponentType::Simple || component_type == GpuComponentType::Output)
         {
             ARM_COMPUTE_ERROR_ON(dst_tensors.size() != 1);
 
-            const auto dst_tensor = dst_tensors[0];
-            const auto &dst_shape = dst_tensor->tensor_shape();
-            const auto &dst_type = dst_tensor->data_type();
+            const auto  dst_tensor = dst_tensors[0];
+            const auto &dst_shape  = dst_tensor->tensor_shape();
+            const auto &dst_type   = dst_tensor->data_type();
 
             tile_usages[dst_tensor] = 0;
 
-            for(auto src_tensor : src_tensors)
+            for (auto src_tensor : src_tensors)
             {
                 const auto &src_shape = src_tensor->tensor_shape();
-                const auto &src_type = src_tensor->data_type();
+                const auto &src_type  = src_tensor->data_type();
 
-                if(src_shape == dst_shape && src_type == dst_type)
+                if (src_shape == dst_shape && src_type == dst_type)
                 {
                     const auto tile_usages_it = tile_usages.find(src_tensor);
                     ARM_COMPUTE_ERROR_ON(tile_usages_it == tile_usages.end());
 
-                    if(component_type == GpuComponentType::Simple || tile_usages_it->second > 0)
+                    if (component_type == GpuComponentType::Simple || tile_usages_it->second > 0)
                     {
                         // Increase the number of tile usages unless this component is an output
                         // and the tile has not been shared with any component.
@@ -212,7 +214,7 @@ void GpuKernelComponentGroup::finalize()
         else
         {
             // Outputs of complex and unfusable components need dedicated tile.
-            for(auto tensor : dst_tensors)
+            for (auto tensor : dst_tensors)
             {
                 tile_usages[tensor] = 0;
             }
@@ -220,25 +222,25 @@ void GpuKernelComponentGroup::finalize()
     }
 
     // Find the smallest list of tiles that the intermediate tensors need to write to.
-    for(auto tensor : _input_tensors)
+    for (auto tensor : _input_tensors)
     {
         _tile_map[tensor] = tensor;
     }
 
-    for(auto component : _components)
+    for (auto component : _components)
     {
         const auto dst_tensors = component->tensors().get_const_dst_tensors();
 
-        for(auto tensor : dst_tensors)
+        for (auto tensor : dst_tensors)
         {
             const auto target_tiles = possible_tile_map.at(tensor);
-            _tile_map[tensor] = tensor;
+            _tile_map[tensor]       = tensor;
 
-            for(auto target : target_tiles)
+            for (auto target : target_tiles)
             {
                 const auto num_usage = tile_usages[target];
 
-                if(num_usage <= 1)
+                if (num_usage <= 1)
                 {
                     // The target tile is consumed by only this operator, so we can reuse it
                     // for the destination tensor data.
@@ -249,26 +251,23 @@ void GpuKernelComponentGroup::finalize()
         }
     }
 
-    for(auto tensor : output_tensors)
+    for (auto tensor : output_tensors)
     {
         _tile_map[tensor] = tensor;
     }
 
     // All intermediate tensors that cannot be shared with any previous tensor
     // will need to be declared as tile variable.
-    for(auto tensor_tile : _tile_map)
+    for (auto tensor_tile : _tile_map)
     {
-        if(tensor_tile.first == tensor_tile.second &&
-           _interm_tensors.find(tensor_tile.first) != _interm_tensors.end())
+        if (tensor_tile.first == tensor_tile.second && _interm_tensors.find(tensor_tile.first) != _interm_tensors.end())
         {
             _tiles.push_back(tensor_tile.first);
         }
     }
 
-    std::set_union(
-        _input_tensors.begin(), _input_tensors.end(),
-        output_tensors.begin(), output_tensors.end(),
-        std::back_inserter(_argument_tensors));
+    std::set_union(_input_tensors.begin(), _input_tensors.end(), output_tensors.begin(), output_tensors.end(),
+                   std::back_inserter(_argument_tensors));
     _any_output_tensor = *output_tensors.begin();
 }
 
@@ -282,7 +281,7 @@ const ITensorInfo *GpuKernelComponentGroup::get_tile_for_tensor(const ITensorInf
 {
     ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
 
-    if(_tile_map.find(tensor) != _tile_map.end())
+    if (_tile_map.find(tensor) != _tile_map.end())
     {
         return _tile_map.at(tensor);
     }
@@ -304,7 +303,7 @@ std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_argument_tensors()
 
 GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_component() const
 {
-    if(empty())
+    if (empty())
     {
         return nullptr;
     }
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h
index c939aec369..6ad71abb39 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h
@@ -25,12 +25,11 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP
 
 #include "components/Types.h"
-
 #include <cstdint>
 #include <cstdlib>
-#include <vector>
-#include <set>
 #include <map>
+#include <set>
+#include <vector>
 
 namespace arm_compute
 {
@@ -129,9 +128,9 @@ public:
     /** Get the number of components within the group */
     size_t size() const;
     /** Check if the component group is empty */
-    bool empty() const;
-    ComponentPtr &operator[](size_t index);
-    const ComponentPtr &operator[](size_t index) const;
+    bool                                               empty() const;
+    ComponentPtr                                      &operator[](size_t index);
+    const ComponentPtr                                &operator[](size_t index) const;
     typename std::vector<ComponentPtr>::iterator       begin();
     typename std::vector<ComponentPtr>::iterator       end();
     typename std::vector<ComponentPtr>::const_iterator begin() const;
@@ -142,13 +141,13 @@ public:
 private:
     std::vector<ComponentPtr> _components{};
 
-    bool _finalized{ false };
+    bool _finalized{false};
 
-    std::vector<const ITensorInfo *> _argument_tensors{};
-    std::set<const ITensorInfo *> _input_tensors{};
-    std::set<const ITensorInfo *> _interm_tensors{};
-    const ITensorInfo *_any_output_tensor{ nullptr };
-    std::vector<const ITensorInfo *> _tiles{};
+    std::vector<const ITensorInfo *>                   _argument_tensors{};
+    std::set<const ITensorInfo *>                      _input_tensors{};
+    std::set<const ITensorInfo *>                      _interm_tensors{};
+    const ITensorInfo                                 *_any_output_tensor{nullptr};
+    std::vector<const ITensorInfo *>                   _tiles{};
     std::map<const ITensorInfo *, const ITensorInfo *> _tile_map{};
 };
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
index a2b6623370..8042e3dd08 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
@@ -23,9 +23,9 @@
  */
 #include "GpuKernelComponentStream.h"
 
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 
 namespace arm_compute
 {
@@ -33,8 +33,10 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-GpuKernelComponentStream::GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map)
-    : _context{ context }, _services{ services }, _component_groups{}, _mem_map{ mem_map }
+GpuKernelComponentStream::GpuKernelComponentStream(GpuWorkloadContext        *context,
+                                                   GpuComponentServices      *services,
+                                                   const MemoryDescriptorMap &mem_map)
+    : _context{context}, _services{services}, _component_groups{}, _mem_map{mem_map}
 {
 }
 
@@ -42,7 +44,7 @@ GpuWorkloadSourceCode GpuKernelComponentStream::write_workload_code()
 {
     GpuWorkloadSourceCode source_code;
     // Traverse through component groups and assemble workload together
-    for(auto && group : _component_groups)
+    for (auto &&group : _component_groups)
     {
         group.finalize();
 
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
index ba2503a938..ef8a8a15b0 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTSTREAM
 
 #include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
 
@@ -53,7 +54,9 @@ public:
      * @param[in] services @ref GpuComponentServices to be used throughout the stream
      * @param[in] mem_map  @ref MemoryDescriptor map used to assemble the @ref GpuWorkloadSourceCode
      */
-    GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map);
+    GpuKernelComponentStream(GpuWorkloadContext        *context,
+                             GpuComponentServices      *services,
+                             const MemoryDescriptorMap &mem_map);
     /** Allow instances of this class to be copy constructed */
     GpuKernelComponentStream(const GpuKernelComponentStream &stream) = default;
     /** Allow instances of this class to be copied */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
index 64e1cdc3bc..24812cd8a7 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/CL/CLCompileContext.h"
 #include "arm_compute/core/Window.h"
+
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
 
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
index c99984fc0e..502ceab807 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
@@ -26,9 +26,9 @@
 #include "arm_compute/core/experimental/Types.h"
 
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
 #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h"
 #else // ACL_INTERNAL_TEST_CKW_IN_DF
@@ -42,7 +42,7 @@ namespace experimental
 namespace dynamic_fusion
 {
 GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components)
-    : _comp_group{ components }, _store_components{}
+    : _comp_group{components}, _store_components{}
 {
     ARM_COMPUTE_UNUSED(services);
 }
@@ -51,9 +51,9 @@ GpuKernelSourceCode GpuLogicalKernel::write_kernel_code()
 {
     GpuKernelSourceCode code;
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-    ClTemplateWriter writer { _comp_group };
+    ClTemplateWriter writer{_comp_group};
 #else  // ACL_INTERNAL_TEST_CKW_IN_DF
-    GpuCkwDriver writer { _comp_group };
+    GpuCkwDriver writer{_comp_group};
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 
     code.name(writer.get_name());
diff --git a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp
index 7bb14c8698..aec8b9db4f 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp
@@ -36,20 +36,15 @@ namespace
 std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
 {
     std::vector<DependencyGraph::TensorId> tensor_ids{};
-    std::transform(
-        std::begin(tensors), std::end(tensors),
-        std::back_inserter(tensor_ids),
-        [](const auto & t)
-    {
-        return t->id();
-    });
+    std::transform(std::begin(tensors), std::end(tensors), std::back_inserter(tensor_ids),
+                   [](const auto &t) { return t->id(); });
     return tensor_ids;
 }
 
 } // namespace
 
 Operator::Operator(OperatorId id, GpuOperatorType operator_type, const ArgumentPack<ITensorInfo> &tensors)
-    : _id{ id }, _operator_type{ operator_type }, _tensors{ tensors }
+    : _id{id}, _operator_type{operator_type}, _tensors{tensors}
 {
 }
 
@@ -73,69 +68,69 @@ bool GpuOperatorGroup::try_add_operator(const Operator &op, bool is_output) cons
     const auto src_tensor_ids = get_tensor_ids(op.tensors().get_const_src_tensors());
     const auto dst_tensor_ids = get_tensor_ids(op.tensors().get_const_dst_tensors());
     // Constraint 1
-    if(!_graph.try_add_operator_as_linear(op.id(), src_tensor_ids, dst_tensor_ids, is_output))
+    if (!_graph.try_add_operator_as_linear(op.id(), src_tensor_ids, dst_tensor_ids, is_output))
     {
         return false;
     }
     // Constraint 2
-    if(_operators.size() >= max_fused_operators)
+    if (_operators.size() >= max_fused_operators)
     {
         return false;
     }
     // Constraint 3.1: Pattern: (Unfusable)
-    if(_operators.size() > 0 && get_root_operator()->operator_type() == GpuOperatorType::Unfusable)
+    if (_operators.size() > 0 && get_root_operator()->operator_type() == GpuOperatorType::Unfusable)
     {
         return false;
     }
     // Constraint 3.2
-    if(_operators.size() > 0 && (op.operator_type() != GpuOperatorType::Simple))
+    if (_operators.size() > 0 && (op.operator_type() != GpuOperatorType::Simple))
     {
         return false;
     }
     // Constraint 4
-    if(op.operator_type() != GpuOperatorType::Unfusable && op.tensors().get_const_dst_tensors().size() != 1U)
+    if (op.operator_type() != GpuOperatorType::Unfusable && op.tensors().get_const_dst_tensors().size() != 1U)
     {
         return false;
     }
     // Constraint 5
-    if(_operators.size() > 0)
+    if (_operators.size() > 0)
     {
         const auto root_dst_tensors = get_root_operator()->tensors().get_const_dst_tensors();
         ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
         const auto first_dst_tensor = root_dst_tensors[0];
         const auto dst_tensors      = op.tensors().get_const_dst_tensors();
-        for(const auto &t : root_dst_tensors)
+        for (const auto &t : root_dst_tensors)
         {
-            if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+            if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
             {
                 return false;
             }
         }
-        for(const auto &t : dst_tensors)
+        for (const auto &t : dst_tensors)
         {
-            if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+            if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
             {
                 return false;
             }
         }
     }
     // Constraint 6
-    if(_operators.size() > 0)
+    if (_operators.size() > 0)
     {
         const auto root_dst_tensors = get_root_operator()->tensors().get_const_dst_tensors();
         ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
         const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
         const auto dst_tensors             = op.tensors().get_const_dst_tensors();
-        for(const auto &t : root_dst_tensors)
+        for (const auto &t : root_dst_tensors)
         {
-            if(t->data_layout() != first_dst_tensor_layout)
+            if (t->data_layout() != first_dst_tensor_layout)
             {
                 return false;
             }
         }
-        for(const auto &t : dst_tensors)
+        for (const auto &t : dst_tensors)
         {
-            if(t->data_layout() != first_dst_tensor_layout)
+            if (t->data_layout() != first_dst_tensor_layout)
             {
                 return false;
             }
@@ -151,16 +146,17 @@ void GpuOperatorGroup::add_operator(const Operator &op, bool is_output)
     _graph.add_operator_as_linear(op.id(), src_tensor_ids, dst_tensor_ids, is_output);
     _operators[op.id()] = op;
 }
-Operator GpuOperatorGroup::new_operator(const GpuOperatorType &operator_type, const ArgumentPack<ITensorInfo> &tensors) const
+Operator GpuOperatorGroup::new_operator(const GpuOperatorType           &operator_type,
+                                        const ArgumentPack<ITensorInfo> &tensors) const
 {
     auto new_id = static_cast<OperatorId>(_operators.size());
-    return Operator{ new_id, operator_type, tensors };
+    return Operator{new_id, operator_type, tensors};
 }
 const Operator *GpuOperatorGroup::get_root_operator() const
 {
     const auto roots = _graph.get_root_ops();
     ARM_COMPUTE_ERROR_ON(roots.size() > 1);
-    if(roots.empty())
+    if (roots.empty())
     {
         return nullptr;
     }
diff --git a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h
index 308a9d796a..0a2369d357 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h
@@ -25,9 +25,11 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORGROUP
 
 #include "arm_compute/core/ITensorInfo.h"
+
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h"
 #include "src/dynamic_fusion/sketch/utils/DependencyGraph.h"
+
 #include <map>
 
 namespace arm_compute
@@ -104,7 +106,7 @@ public:
     const Operator *get_root_operator() const;
 
 private:
-    DependencyGraph _graph{};
+    DependencyGraph                _graph{};
     std::map<OperatorId, Operator> _operators{};
 };
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
index c2bd012703..36cad790c7 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
@@ -23,7 +23,9 @@
  */
 
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
+
 #include "arm_compute/core/CL/CLCompileContext.h"
+
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
 
 namespace arm_compute
@@ -33,7 +35,7 @@ namespace experimental
 namespace dynamic_fusion
 {
 GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx)
-    : _impl{ std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) }
+    : _impl{std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx)}
 {
 }
 
@@ -74,7 +76,11 @@ const GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() const
 }
 
 GpuWorkloadContext::Impl::Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx)
-    : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), _next_tensor_id(1), _mem_map(), _managed_tensor_info()
+    : _gpu_language(gpu_language),
+      _cl_compile_ctx(cl_compile_ctx),
+      _next_tensor_id(1),
+      _mem_map(),
+      _managed_tensor_info()
 {
 }
 
@@ -100,7 +106,7 @@ void GpuWorkloadContext::Impl::register_user_tensor(ITensorInfo &tensor_info)
     const auto tensor_id = next_tensor_id();
 
     tensor_info.set_id(tensor_id);
-    _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::User };
+    _mem_map[tensor_id] = MemoryDescriptor{MemoryType::User};
     // Save a *copy* of the user tensor info in workload context for future reference
     // Note that this means if the user modifies the @p tensor_info, the change will not be reflected in the context
     _managed_tensor_info.emplace(tensor_info.id(), std::make_unique<TensorInfo>(tensor_info));
@@ -111,7 +117,7 @@ ITensorInfo *GpuWorkloadContext::Impl::create_virtual_tensor()
     auto       tensor_info = std::make_unique<TensorInfo>();
     const auto tensor_id   = -next_tensor_id();
     tensor_info->set_id(tensor_id);
-    _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual };
+    _mem_map[tensor_id] = MemoryDescriptor{MemoryType::Virtual};
     auto inserted       = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
     return inserted.first->second.get();
 }
@@ -121,7 +127,7 @@ ITensorInfo *GpuWorkloadContext::Impl::create_auxiliary_tensor(const ITensorInfo
     auto       tensor_info = std::make_unique<TensorInfo>(itensor_info);
     const auto tensor_id   = next_tensor_id();
     tensor_info->set_id(tensor_id);
-    _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ tensor_info->total_size() } };
+    _mem_map[tensor_id] = MemoryDescriptor{MemoryType::Auxiliary, AuxMemoryInfo{tensor_info->total_size()}};
     auto inserted       = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
     return inserted.first->second.get();
 }
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
index c169476a70..7d9699031f 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
@@ -27,8 +27,8 @@
 
 #include "arm_compute/core/CL/CLCompileContext.h"
 #include "arm_compute/core/ITensorInfo.h"
-#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
 
 namespace arm_compute
 {
@@ -93,8 +93,8 @@ private:
     GpuLanguage       _gpu_language;
     CLCompileContext *_cl_compile_ctx;
 
-    ITensorInfo::Id     _next_tensor_id;
-    MemoryDescriptorMap _mem_map;
+    ITensorInfo::Id                                        _next_tensor_id;
+    MemoryDescriptorMap                                    _mem_map;
     std::map<ITensorInfo::Id, std::unique_ptr<TensorInfo>> _managed_tensor_info;
 };
 
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
index d3a20c0dfe..973f7c747f 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 
 namespace arm_compute
@@ -30,8 +31,7 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-GpuWorkloadSketch::GpuWorkloadSketch(Context *context)
-    : _impl{ std::make_unique<Implementation>(context) }
+GpuWorkloadSketch::GpuWorkloadSketch(Context *context) : _impl{std::make_unique<Implementation>(context)}
 {
 }
 GpuWorkloadSketch::~GpuWorkloadSketch()
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
index d3033898e9..fea4fe9577 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
@@ -24,8 +24,9 @@
 #ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
 
-#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+
 #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h"
@@ -45,12 +46,8 @@ public:
      *
      * @param[in] context global workload creation context
      */
-    explicit Implementation(
-        Context *context)
-        : _context{ context },
-          _comp_services{},
-          _component_graph{ _context, &_comp_services },
-          _operator_group{}
+    explicit Implementation(Context *context)
+        : _context{context}, _comp_services{}, _component_graph{_context, &_comp_services}, _operator_group{}
     {
     }
     /** Prevent instances of this class from being copy constructed */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
index 578366daaf..43bcc47fa0 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/experimental/Types.h"
 #include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
 
@@ -45,7 +46,7 @@ namespace
  */
 GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &flat_kernel_args)
 {
-    if(flat_kernel_args.empty())
+    if (flat_kernel_args.empty())
     {
         return {};
     }
@@ -56,10 +57,10 @@ GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &
     flat_kernel_args.pop_front();
     const auto tensor_id = karg_head.id();
 
-    while(!flat_kernel_args.empty())
+    while (!flat_kernel_args.empty())
     {
         const GpuKernelArgumentBinding &karg = flat_kernel_args.front();
-        if(karg.id() != tensor_id) // Encounter the next tensor, return the current tensor's kernel arguments
+        if (karg.id() != tensor_id) // Encounter the next tensor, return the current tensor's kernel arguments
         {
             return tensor_kargs;
         }
@@ -68,7 +69,7 @@ GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &
     }
     return tensor_kargs;
 }
-}
+} // namespace
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 /** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */
 using UnitWorkloadId = int32_t;
@@ -92,9 +93,7 @@ public:
     GpuWorkloadArgument(const ITensorInfo           &tensor_info,
                         const MemoryDescriptor      &mem_desc,
                         const GpuKernelArgumentInfo &kernel_arg_info)
-        : _tensor_info{ tensor_info },
-          _mem_desc{ mem_desc },
-          _kernel_arg_info{ kernel_arg_info }
+        : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_arg_info{kernel_arg_info}
     {
     }
 #else  // ACL_INTERNAL_TEST_CKW_IN_DF
@@ -107,9 +106,7 @@ public:
     GpuWorkloadArgument(const ITensorInfo           &tensor_info,
                         const MemoryDescriptor      &mem_desc,
                         const GpuKernelArgumentList &kernel_args)
-        : _tensor_info{ tensor_info },
-          _mem_desc{ mem_desc },
-          _kernel_args{ kernel_args }
+        : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_args{kernel_args}
     {
     }
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
@@ -175,9 +172,9 @@ private:
     TensorInfo       _tensor_info{};
     MemoryDescriptor _mem_desc{};
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-    GpuKernelArgumentInfo _kernel_arg_info {};
+    GpuKernelArgumentInfo _kernel_arg_info{};
 #else  // ACL_INTERNAL_TEST_CKW_IN_DF
-    GpuKernelArgumentList     _kernel_args {};
+    GpuKernelArgumentList _kernel_args{};
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 };
 
@@ -190,7 +187,7 @@ struct UnitWorkloadStage
         Prepare, /**< Only run once at the beginning. */
         Run,     /**< Run every time after the first time. */
     };
-    Stage stage{ Stage::Run };
+    Stage stage{Stage::Run};
 };
 
 inline bool operator==(const UnitWorkloadStage &stage0, const UnitWorkloadStage &stage1)
@@ -212,7 +209,7 @@ public:
      * @param[in] stage       Stage of the unit workload
      */
     GpuUnitWorkload(UnitWorkloadId id, const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage)
-        : _id{ id }, _kernel_code{ kernel_code }, _stage{ stage }
+        : _id{id}, _kernel_code{kernel_code}, _stage{stage}
     {
     }
     /** Get the id of the unit workload */
@@ -253,7 +250,10 @@ public:
      *
      * @return UnitWorkloadId  Allocated unit workload id
      */
-    UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map, const GpuWorkloadContext *context)
+    UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code,
+                                     const UnitWorkloadStage   &stage,
+                                     const MemoryDescriptorMap &mem_map,
+                                     const GpuWorkloadContext  *context)
     {
         // Use the size of the kernel codes as Id
         const auto uwk_id    = static_cast<UnitWorkloadId>(_unit_workloads.size());
@@ -262,12 +262,13 @@ public:
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
         ARM_COMPUTE_UNUSED(context);
         // Assemble kernel argument with memory descriptor to form workload argument
-        for(const auto &id_arg : kernel_code.arguments())
+        for (const auto &id_arg : kernel_code.arguments())
         {
-            const auto arg_id           = id_arg.first;
-            const auto arg              = id_arg.second;
-            _workload_arguments[arg_id] = GpuWorkloadArgument{ *arg.tensor_info(), mem_map.at(arg_id), *arg.kernel_argument_info() };
-            if(_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end())
+            const auto arg_id = id_arg.first;
+            const auto arg    = id_arg.second;
+            _workload_arguments[arg_id] =
+                GpuWorkloadArgument{*arg.tensor_info(), mem_map.at(arg_id), *arg.kernel_argument_info()};
+            if (_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end())
             {
                 _tensor_uwork_map[arg_id] = std::set<UnitWorkloadId>();
             }
@@ -276,18 +277,19 @@ public:
 #else  // ACL_INTERNAL_TEST_CKW_IN_DF
         GpuKernelArgumentList flat_kernel_args = kernel_code.arguments();
         GpuKernelArgumentList tensor_kargs{};
-        while(true)
+        while (true)
         {
             tensor_kargs = extract_kernel_args_for_one_tensor(flat_kernel_args);
-            if(tensor_kargs.empty())
+            if (tensor_kargs.empty())
             {
                 break;
             }
             else
             {
                 const auto tensor_id           = tensor_kargs.at(0).id();
-                _workload_arguments[tensor_id] = GpuWorkloadArgument{ *context->implementation().get_tensor_info(tensor_id), mem_map.at(tensor_id), tensor_kargs };
-                if(_tensor_uwork_map.find(tensor_id) == _tensor_uwork_map.end())
+                _workload_arguments[tensor_id] = GpuWorkloadArgument{
+                    *context->implementation().get_tensor_info(tensor_id), mem_map.at(tensor_id), tensor_kargs};
+                if (_tensor_uwork_map.find(tensor_id) == _tensor_uwork_map.end())
                 {
                     _tensor_uwork_map[tensor_id] = std::set<UnitWorkloadId>();
                 }
@@ -308,7 +310,7 @@ public:
     {
         std::vector<UnitWorkloadId> ids{};
 
-        for(const auto &uwk : _unit_workloads)
+        for (const auto &uwk : _unit_workloads)
         {
             ids.push_back(uwk.id());
         }
@@ -323,7 +325,7 @@ public:
     std::vector<ITensorInfo::Id> tensors() const
     {
         std::vector<ITensorInfo::Id> ids{};
-        for(const auto &id_tensor : _workload_arguments)
+        for (const auto &id_tensor : _workload_arguments)
         {
             ids.push_back(id_tensor.first);
         }
@@ -337,7 +339,7 @@ public:
     }
 
 private:
-    std::vector<GpuUnitWorkload> _unit_workloads{};
+    std::vector<GpuUnitWorkload>                        _unit_workloads{};
     std::map<ITensorInfo::Id, GpuWorkloadArgument>      _workload_arguments{};
     std::map<ITensorInfo::Id, std::set<UnitWorkloadId>> _tensor_uwork_map{};
 };
diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
index 1d8b231efd..ad474674f9 100644
--- a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/CL/CLCompileContext.h"
 #include "arm_compute/core/Window.h"
+
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
 
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp
index 4b4c22fa1d..c4ab110c92 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp
@@ -23,6 +23,7 @@
  */
 
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+
 #include "ckw/Error.h"
 
 namespace arm_compute
@@ -36,12 +37,12 @@ GpuCkwComponentArgument::GpuCkwComponentArgument()
 {
 }
 
-GpuCkwComponentArgument::GpuCkwComponentArgument(ckw::TensorOperand &tensor)
-    : _tensor(&tensor)
+GpuCkwComponentArgument::GpuCkwComponentArgument(ckw::TensorOperand &tensor) : _tensor(&tensor)
 {
 }
 
-GpuCkwComponentArgument &GpuCkwComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &tile_sampler)
+GpuCkwComponentArgument &GpuCkwComponentArgument::init_virtual_tensor(ckw::TileOperand             &tile,
+                                                                      const ckw::TensorTileSampler &tile_sampler)
 {
     CKW_ASSERT(_tile == nullptr);
 
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h
index 80f91389a0..863989a7bd 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h
@@ -110,9 +110,9 @@ public:
     const ckw::TensorTileSampler &tile_sampler() const;
 
 private:
-    ckw::TensorOperand *_tensor{ nullptr };
-    ckw::TileOperand   *_tile{ nullptr };
-    ckw::TensorTileSampler  _tile_sampler{};
+    ckw::TensorOperand    *_tensor{nullptr};
+    ckw::TileOperand      *_tile{nullptr};
+    ckw::TensorTileSampler _tile_sampler{};
 };
 
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
index a24a172d77..c927f32bde 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
@@ -23,17 +23,16 @@
  */
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h"
 
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Window.h"
+
 #include "src/common/utils/Log.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
-
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 
 using namespace ckw;
 namespace arm_compute
@@ -43,11 +42,11 @@ namespace experimental
 namespace dynamic_fusion
 {
 GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components)
-    : _components{ components }, _kernel{ GpuTargetLanguage::OpenCL }, _code{}
+    : _components{components}, _kernel{GpuTargetLanguage::OpenCL}, _code{}
 {
     // Generate kernel name
     std::string name = "";
-    for(auto &comp : _components)
+    for (auto &comp : _components)
     {
         auto ckw_driver = comp->ckw_component_driver();
         ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
@@ -60,7 +59,7 @@ GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components)
     GpuCkwScopedKernelWriter writer(&root_writer);
     GpuCkwVariableTable      vtable{};
 
-    for(auto &comp : _components)
+    for (auto &comp : _components)
     {
         auto ckw_driver = comp->ckw_component_driver();
         ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
@@ -82,7 +81,7 @@ std::string GpuCkwDriver::get_code()
 std::string GpuCkwDriver::get_config_id()
 {
     std::string id = "";
-    for(auto &comp : _components)
+    for (auto &comp : _components)
     {
         auto ckw_driver = comp->ckw_component_driver();
         ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
@@ -101,9 +100,9 @@ Window GpuCkwDriver::get_window() const
 GpuKernelArgumentList GpuCkwDriver::get_kernel_arguments()
 {
     GpuKernelArgumentList args{};
-    for(const auto &arg : _kernel.arguments())
+    for (const auto &arg : _kernel.arguments())
     {
-        switch(arg.type())
+        switch (arg.type())
         {
             case KernelArgument::Type::TensorStorage:
             {
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
index 19db575fea..2ca5fb435c 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
@@ -24,12 +24,12 @@
 #ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER
 #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER
 
+#include "ckw/Kernel.h"
+
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 #include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
 
-#include "ckw/Kernel.h"
-
 #include <map>
 #include <string>
 
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp
index ca4f121566..5f8ce919e3 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp
@@ -23,10 +23,12 @@
  */
 
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+
 #include "ckw/Error.h"
 #include "ckw/TileInfo.h"
 
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+
 namespace arm_compute
 {
 namespace experimental
@@ -34,21 +36,21 @@ namespace experimental
 namespace dynamic_fusion
 {
 
-GpuCkwKernelWriter::GpuCkwKernelWriter(ckw::Kernel &kernel)
-    : KernelWriter(kernel)
+GpuCkwKernelWriter::GpuCkwKernelWriter(ckw::Kernel &kernel) : KernelWriter(kernel)
 {
 }
 
 void GpuCkwKernelWriter::op_load_once(GpuCkwComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler)
 {
-    if(!tensor_or_tile->has_tile())
+    if (!tensor_or_tile->has_tile())
     {
         CKW_ASSERT(tensor_or_tile->has_tensor());
 
         auto &tensor = tensor_or_tile->tensor();
 
         const auto tile_name = tensor.name() + "_tile";
-        auto      &tile      = declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width()));
+        auto      &tile =
+            declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width()));
 
         op_load(tile, tensor, sampler);
 
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp
index 043fda9e6f..cbadbd9639 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp
@@ -23,6 +23,7 @@
  */
 
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 
 namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h
index 4d11b5e3e4..81049bfe37 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h
@@ -63,7 +63,7 @@ public:
 
 private:
     GpuCkwKernelWriter *_writer;
-    int32_t          _parent_id_space;
+    int32_t             _parent_id_space;
 };
 
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
index 37c27cd116..88a0cf7f43 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
@@ -23,11 +23,12 @@
  */
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
 
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
 #include <sstream>
 
 namespace arm_compute
@@ -36,19 +37,22 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
-                                                               const std::string &alias)
+GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group,
+                                                               GpuCkwScopedKernelWriter      &writer,
+                                                               const ITensorInfo             *tensor,
+                                                               TensorStorageType              storage,
+                                                               const std::string             &alias)
 {
     ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
 
     // Do not re-declare if the variable associated with the tensor has already been declared
     auto it = _vars.find(tensor->id());
 
-    if(it != _vars.end())
+    if (it != _vars.end())
     {
         return &it->second;
     }
-    if(comp_group.is_intermediate_tensor(tensor))
+    if (comp_group.is_intermediate_tensor(tensor))
     {
         // Create a virtual tensor variable
         GpuCkwComponentArgument var;
@@ -61,7 +65,7 @@ GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelCo
         std::stringstream ss;
         ss << alias << "_t" << abs(tensor->id());
         const auto              uniq_name = ss.str();
-        GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name, to_ckw(*tensor), to_ckw(storage)) };
+        GpuCkwComponentArgument var{writer->declare_tensor_argument(uniq_name, to_ckw(*tensor), to_ckw(storage))};
         auto                  &&inserted = _vars.emplace(tensor->id(), var);
         return &(inserted.first->second);
     }
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
index 0649dcba9d..2b118911b8 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
@@ -25,6 +25,7 @@
 #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE
 
 #include "arm_compute/core/ITensorInfo.h"
+
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
 
 #include <map>
@@ -58,8 +59,11 @@ public:
      *
      * @return GpuCkwComponentArgument*
      */
-    GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
-                                              const std::string &alias = "unnamed");
+    GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group,
+                                              GpuCkwScopedKernelWriter      &writer,
+                                              const ITensorInfo             *tensor,
+                                              TensorStorageType              storage,
+                                              const std::string             &alias = "unnamed");
 
 private:
     std::map<ITensorInfo::Id, GpuCkwComponentArgument> _vars{};
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h
index 14086f785e..52e56e2e35 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h
@@ -25,6 +25,7 @@
 #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_IGPUCKWCOMPONENTDRIVER
 
 #include "arm_compute/core/Window.h"
+
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
 #include "src/dynamic_fusion/sketch/gpu/components/Types.h"
 
@@ -73,8 +74,7 @@ public:
      * @param[in] id      Component id
      * @param[in] tensors Tensor arguments to the components
      */
-    IGpuCkwComponentDriver(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
-        : _id{ id }, _tensors{ tensors }
+    IGpuCkwComponentDriver(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) : _id{id}, _tensors{tensors}
     {
     }
     /** Destructor */
@@ -89,7 +89,9 @@ public:
      *
      *                            @note @p writer can only be passed via value since the new scope is created in the copy constructor
      */
-    virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const = 0;
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const = 0;
     /** Get tensor arguments */
     ArgumentPack<ITensorInfo> tensors() const
     {
@@ -128,7 +130,7 @@ public:
     }
 
 private:
-    ComponentId               _id{ -1 };
+    ComponentId               _id{-1};
     ArgumentPack<ITensorInfo> _tensors{};
 };
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
index c07fac0e0d..c3b1b3c8bc 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
@@ -24,16 +24,18 @@
 #include "GpuCkwActivation.h"
 
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
 #include "ckw/TensorTileSampler.h"
+
 #include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
 #include <string>
 
 using namespace ckw;
@@ -87,24 +89,25 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
 GpuCkwActivation::GpuCkwActivation(ComponentId                      id,
                                    const ArgumentPack<ITensorInfo> &tensors,
                                    const Attributes                &attributes)
-    : IGpuCkwComponentDriver{ id, tensors },
-      _src{},
-      _dst{},
-      _attributes{ attributes }
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
     ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
 }
 
-void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwActivation::write_component_code(const ComponentGroup    &comp_group,
+                                            GpuCkwVariableTable     &vtable,
+                                            GpuCkwScopedKernelWriter writer) const
 {
     const auto         root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
     const unsigned int n0          = root_window.x().step();
     const unsigned int m0          = root_window.y().step();
 
-    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
-    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+    GpuCkwComponentArgument *src =
+        vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+    GpuCkwComponentArgument *dst =
+        vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
 
     load_src_dst_tiles_and_prepare_sampler(writer, src, dst, m0, n0, create_sampler);
 
@@ -119,7 +122,7 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp
     const auto &constant_B       = writer->declare_tile("B_VAL", _attributes.b());
 
     // Perform the operation.
-    switch(_attributes.activation())
+    switch (_attributes.activation())
     {
         case ActivationLayerInfo::ActivationFunction::LOGISTIC:
         {
@@ -179,9 +182,10 @@ Window GpuCkwActivation::get_window() const
     // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
     // This is in line with the collapsing convention used by operators like Conv2d
     output_shape.collapse(2U, 1U);
-    constexpr unsigned int vector_size_byte_opencl           = 16;
-    const unsigned int     num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
-    Window                 win                               = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+    constexpr unsigned int vector_size_byte_opencl = 16;
+    const unsigned int     num_elems_processed_per_iteration =
+        adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+    Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
 
     return win;
 }
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h
index e157e36cbf..386e933a72 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h
@@ -46,15 +46,15 @@ public:
      * @param[in] tensors    Tensor arguments to the component
      * @param[in] attributes Component attributes
      */
-    GpuCkwActivation(ComponentId                      id,
-                            const ArgumentPack<ITensorInfo> &tensors,
-                            const Attributes                &attributes);
+    GpuCkwActivation(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwActivation);
     /** Destructor */
     ~GpuCkwActivation() override = default;
     // Inherited methods overriden:
-    virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
-    Window get_window() const override;
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
 
 private:
     const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
index 6ecf2bac44..e8e5087633 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
@@ -24,16 +24,18 @@
 #include "GpuCkwCast.h"
 
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
 #include "ckw/TensorTileSampler.h"
+
 #include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
 #include <string>
 
 using namespace ckw;
@@ -84,30 +86,29 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
 }
 } // namespace
 
-GpuCkwCast::GpuCkwCast(ComponentId                      id,
-                       const ArgumentPack<ITensorInfo> &tensors,
-                       const Attributes                &attributes)
-    : IGpuCkwComponentDriver{ id, tensors },
-      _src{},
-      _dst{},
-      _attributes{ attributes }
+GpuCkwCast::GpuCkwCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
     ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
 }
 
-void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwCast::write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const
 {
     const auto         root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
     const unsigned int n0          = root_window.x().step();
     const unsigned int m0          = root_window.y().step();
 
-    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
-    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+    GpuCkwComponentArgument *src =
+        vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+    GpuCkwComponentArgument *dst =
+        vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
 
     // Load the source tile and prepare the sampler.
-    if(!src->has_tile())
+    if (!src->has_tile())
     {
         const auto sampler = create_sampler(writer, m0, n0);
         writer->op_load_once(src, sampler);
@@ -122,7 +123,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
     const auto &sampler  = src->tile_sampler();
 
     // Prepare the output tile.
-    if(!dst->has_tile())
+    if (!dst->has_tile())
     {
         // Get Target datatype and convert it to ckw::DataType.
         ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
@@ -143,7 +144,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
     const size_t dst_size  = data_size_from_type(_dst->data_type());
     const bool   cast_down = (src_size >= dst_size);
 
-    if(cast_down && is_data_type_quantized(_src->data_type()))
+    if (cast_down && is_data_type_quantized(_src->data_type()))
     {
         const auto &constant_x80 = writer->declare_tile("0x80", 0x80);
         writer->op_binary_expression(src_tile, src_tile, BinaryOp::BitwiseXOR, constant_x80);
@@ -151,7 +152,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
 
     ckw::ConvertPolicy convert_policy = ckw::ConvertPolicy::None;
 
-    if(cast_down && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
+    if (cast_down && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
     {
         convert_policy = ckw::ConvertPolicy::Saturate;
     }
@@ -167,9 +168,10 @@ Window GpuCkwCast::get_window() const
     // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
     // This is in line with the collapsing convention used by operators like Conv2d
     output_shape.collapse(2U, 1U);
-    constexpr unsigned int vector_size_byte_opencl           = 16;
-    const unsigned int     num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
-    Window                 win                               = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+    constexpr unsigned int vector_size_byte_opencl = 16;
+    const unsigned int     num_elems_processed_per_iteration =
+        adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+    Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
 
     return win;
 }
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
index 821cec1e19..2389301196 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
@@ -46,15 +46,15 @@ public:
      * @param[in] tensors    Tensor arguments to the component
      * @param[in] attributes Component attributes
      */
-    GpuCkwCast(ComponentId                      id,
-                            const ArgumentPack<ITensorInfo> &tensors,
-                            const Attributes                &attributes);
+    GpuCkwCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwCast);
     /** Destructor */
     ~GpuCkwCast() override = default;
     // Inherited methods overriden:
-    virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
-    Window get_window() const override;
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
 
 private:
     const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp
index 3c906646a6..7833da2334 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp
@@ -25,21 +25,20 @@
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.h"
 
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
 #include "arm_compute/core/utils/StringUtils.h"
-
+#include "arm_compute/core/Validate.h"
 #include "ckw/TensorTileSampler.h"
 #include "ckw/TileInfo.h"
 
 #include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 
 namespace arm_compute
 {
@@ -54,13 +53,7 @@ GpuCkwDirectConv2d::GpuCkwDirectConv2d(ComponentId                      id,
                                        const ArgumentPack<ITensorInfo> &tensors,
                                        const Attributes                &attributes,
                                        const Settings                  &settings)
-    : IGpuCkwComponentDriver{ id, tensors },
-      _src{},
-      _wei{},
-      _bia{},
-      _dst{},
-      _attributes{ attributes },
-      _settings{ settings }
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _wei{}, _bia{}, _dst{}, _attributes{attributes}, _settings{settings}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _wei = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -69,7 +62,9 @@ GpuCkwDirectConv2d::GpuCkwDirectConv2d(ComponentId                      id,
     ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _wei, _dst); // Bias can be null
 }
 
-void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwDirectConv2d::write_component_code(const ComponentGroup    &comp_group,
+                                              GpuCkwVariableTable     &vtable,
+                                              GpuCkwScopedKernelWriter writer) const
 {
     const auto desc = _settings.direct_conv_descriptor();
     ARM_COMPUTE_ERROR_ON_MSG(desc.export_input_to_cl_image || desc.export_output_to_cl_image,
@@ -99,15 +94,18 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
     // extra loop to compute the left-over elements.
     const bool use_cl_image_for_weights = desc.export_weights_to_cl_image && (k0 == 4) && (K % 4 == 0);
 
-    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+    GpuCkwComponentArgument *src =
+        vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
     GpuCkwComponentArgument *wei = vtable.declare_variable(
-        comp_group, writer, _wei, use_cl_image_for_weights ? TensorStorageType::ClImage2dReadOnly : TensorStorageType::ClBufferUint8Ptr, "wei");
-    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+        comp_group, writer, _wei,
+        use_cl_image_for_weights ? TensorStorageType::ClImage2dReadOnly : TensorStorageType::ClBufferUint8Ptr, "wei");
+    GpuCkwComponentArgument *dst =
+        vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
     GpuCkwComponentArgument *bia = nullptr;
 
     const bool using_bias = _bia != nullptr;
 
-    if(using_bias)
+    if (using_bias)
     {
         bia = vtable.declare_variable(comp_group, writer, _bia, TensorStorageType::ClBufferUint8Ptr, "bia");
     }
@@ -154,7 +152,8 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
     src_sampler.address_mode_x(TensorSamplerAddressModeX::None);
     // We cannot have out-of-bounds reads when the kernel height is equal to 1. Otherwise, we need to ensure the
     // indirection buffer mi does not contain negative values representing out-of-bounds reads.
-    src_sampler.address_mode_y(kernel_height == 1 ? TensorSamplerAddressModeY::None : TensorSamplerAddressModeY::SkipMinEdgeOnly);
+    src_sampler.address_mode_y(kernel_height == 1 ? TensorSamplerAddressModeY::None
+                                                  : TensorSamplerAddressModeY::SkipMinEdgeOnly);
     src_sampler.address_mode_z(TensorSamplerAddressModeZ::None);
 
     TensorTileSampler wei_sampler;
@@ -178,7 +177,7 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
     dst_sampler.z(tile_0);
     dst_sampler.b(tile_bout);
 
-    if(!dst->has_tile())
+    if (!dst->has_tile())
     {
         auto &tile = writer->declare_tile("dst", TileInfo(to_ckw(_dst->data_type()), m0, n0));
         dst->init_virtual_tensor(tile, dst_sampler);
@@ -189,10 +188,10 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
 
     // We create a 2d container of size (M0, 1) to store the indices for iteration
     TileContainer it;
-    for(int m = 0; m < m0; ++m)
+    for (int m = 0; m < m0; ++m)
     {
-        std::vector<std::string> idx { std::to_string(m) };
-        it.push_back({ idx });
+        std::vector<std::string> idx{std::to_string(m)};
+        it.push_back({idx});
     }
     const auto &tile_it = writer->declare_tile("it", it, ckw::DataType::Int32);
 
@@ -289,9 +288,9 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
     // Bias addition
     // NOTE: This operation will be removed from this kernel as the interface is standardized. The intended way of
     // performing bias addition is to fuse this convolution kernel with a following elementwise addition kernel.
-    if(using_bias)
+    if (using_bias)
     {
-        if(!bia->has_tile())
+        if (!bia->has_tile())
         {
             // Reuse the destination sampler for the bias
             writer->op_load_once(bia, dst_sampler);
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
index c8bf999261..2935ba45ea 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
@@ -24,22 +24,24 @@
 #include "GpuCkwElementwiseBinary.h"
 
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/StringUtils.h"
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/core/Validate.h"
 #include "ckw/TensorTileSampler.h"
 #include "ckw/types/TensorSamplerTypes.h"
+
 #include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h"
 #include "src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 #include "support/StringSupport.h"
+
 #include <algorithm>
 #include <string>
 
@@ -53,11 +55,7 @@ namespace dynamic_fusion
 GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId                      id,
                                                  const ArgumentPack<ITensorInfo> &tensors,
                                                  const Attributes                &attributes)
-    : IGpuCkwComponentDriver{ id, tensors },
-      _lhs{},
-      _rhs{},
-      _dst{},
-      _attributes{ attributes }
+    : IGpuCkwComponentDriver{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}
 {
     _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -65,15 +63,20 @@ GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId
     ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst);
 }
 
-void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup    &comp_group,
+                                                   GpuCkwVariableTable     &vtable,
+                                                   GpuCkwScopedKernelWriter writer) const
 {
     const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
     const auto n0          = static_cast<int32_t>(root_window.x().step());
     const auto m0          = static_cast<int32_t>(root_window.y().step());
 
-    GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs");
-    GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs");
-    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+    GpuCkwComponentArgument *lhs =
+        vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs");
+    GpuCkwComponentArgument *rhs =
+        vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs");
+    GpuCkwComponentArgument *dst =
+        vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
 
     auto &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32);
     auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
@@ -86,32 +89,36 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr
     auto &const_0 = writer->declare_tile("0", 0);
 
     // Load the LHS and RHS tiles
-    if(!lhs->has_tile())
+    if (!lhs->has_tile())
     {
-        auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _lhs->dimension(0), _lhs->dimension(1), n0, m0, "lhs_", const_0);
+        auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _lhs->dimension(0), _lhs->dimension(1),
+                                                        n0, m0, "lhs_", const_0);
         sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension
         sampler.z(const_0);
         sampler.b(gid_2);
         writer->op_load_once(lhs, sampler);
     }
-    if(!rhs->has_tile())
+    if (!rhs->has_tile())
     {
-        auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _rhs->dimension(0), _rhs->dimension(1), n0, m0, "rhs_", const_0);
+        auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _rhs->dimension(0), _rhs->dimension(1),
+                                                        n0, m0, "rhs_", const_0);
         sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension
         sampler.z(const_0);
         sampler.b(gid_2);
         writer->op_load_once(rhs, sampler);
     }
 
-    auto dst_sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _dst->dimension(0), _dst->dimension(1), n0, m0, "dst_", const_0);
+    auto dst_sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _dst->dimension(0), _dst->dimension(1),
+                                                        n0, m0, "dst_", const_0);
     dst_sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension
     dst_sampler.z(const_0);
     dst_sampler.b(gid_2);
 
     // Prepare the output tile.
-    if(!dst->has_tile())
+    if (!dst->has_tile())
     {
-        auto &tile = writer->declare_tile("dst_tile", ckw::TileInfo(to_ckw(_dst->data_type()), dst_sampler.height(), dst_sampler.width()));
+        auto &tile = writer->declare_tile(
+            "dst_tile", ckw::TileInfo(to_ckw(_dst->data_type()), dst_sampler.height(), dst_sampler.width()));
         dst->init_virtual_tensor(tile, dst_sampler);
     }
 
@@ -131,9 +138,10 @@ Window GpuCkwElementwiseBinary::get_window() const
     // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
     // This is in line with the collapsing convention used by operators like Conv2d
     output_shape.collapse(2U, 1U);
-    constexpr unsigned int vector_size_byte_opencl           = 16;
-    const unsigned int     num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
-    Window                 win                               = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+    constexpr unsigned int vector_size_byte_opencl = 16;
+    const unsigned int     num_elems_processed_per_iteration =
+        adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+    Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
 
     return win;
 }
@@ -141,11 +149,12 @@ Window GpuCkwElementwiseBinary::get_window() const
 std::string GpuCkwElementwiseBinary::get_name(const ComponentGroup &comp_group) const
 {
     ARM_COMPUTE_UNUSED(comp_group);
-    const std::vector<std::string> build_params =
-    {
+    const std::vector<std::string> build_params = {
         "elementwise_binary",
-        "op", to_string(_attributes.operation()),
-        "dt", lower_string(string_from_data_type(_dst->data_type())),
+        "op",
+        to_string(_attributes.operation()),
+        "dt",
+        lower_string(string_from_data_type(_dst->data_type())),
     };
     return join(build_params, "_");
 }
@@ -154,13 +163,16 @@ std::string GpuCkwElementwiseBinary::get_tuner_id(const ComponentGroup &comp_gro
 {
     ARM_COMPUTE_UNUSED(comp_group);
     /// NOTE: Hardcoded for now, the parameters should ideally be exported by ckw (a selection of constant tiles)
-    std::vector<std::string> build_params =
-    {
+    std::vector<std::string> build_params = {
         "elementwise_binary",
-        "op", to_string(_attributes.operation()),
-        "dt", lower_string(string_from_data_type(_dst->data_type())),
-        "dst_dim0", support::cpp11::to_string(_dst->dimension(0)),
-        "dst_dim1", support::cpp11::to_string(_dst->dimension(1)),
+        "op",
+        to_string(_attributes.operation()),
+        "dt",
+        lower_string(string_from_data_type(_dst->data_type())),
+        "dst_dim0",
+        support::cpp11::to_string(_dst->dimension(0)),
+        "dst_dim1",
+        support::cpp11::to_string(_dst->dimension(1)),
     };
     return join(build_params, "_");
 }
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h
index e9c41530f8..1a20d4c533 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h
@@ -46,17 +46,17 @@ public:
      * @param[in] tensors    Tensor arguments to the component
      * @param[in] attributes Component attributes
      */
-    GpuCkwElementwiseBinary(ComponentId                      id,
-                            const ArgumentPack<ITensorInfo> &tensors,
-                            const Attributes                &attributes);
+    GpuCkwElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwElementwiseBinary);
     /** Destructor */
     ~GpuCkwElementwiseBinary() override = default;
     // Inherited methods overriden:
-    virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
-    Window      get_window() const override;
-    std::string get_name(const ComponentGroup &comp_group) const override;
-    std::string get_tuner_id(const ComponentGroup &comp_group) const override;
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+    std::string  get_name(const ComponentGroup &comp_group) const override;
+    std::string  get_tuner_id(const ComponentGroup &comp_group) const override;
 
 private:
     const ITensorInfo *_lhs;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
index 9c9a298132..8ab3ec3a55 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
@@ -24,17 +24,18 @@
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h"
 
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
 #include "ckw/TensorTileSampler.h"
+
 #include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 
 using namespace ckw;
 
@@ -48,11 +49,7 @@ GpuCkwPool2d::GpuCkwPool2d(ComponentId                      id,
                            const ArgumentPack<ITensorInfo> &tensors,
                            const Attributes                &attributes,
                            const Settings                  &settings)
-    : IGpuCkwComponentDriver{ id, tensors },
-      _src{},
-      _dst{},
-      _attributes{ attributes },
-      _settings{ settings }
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings}
 
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
@@ -60,14 +57,18 @@ GpuCkwPool2d::GpuCkwPool2d(ComponentId                      id,
     ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
 }
 
-void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwPool2d::write_component_code(const ComponentGroup    &comp_group,
+                                        GpuCkwVariableTable     &vtable,
+                                        GpuCkwScopedKernelWriter writer) const
 {
     const auto         root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
     const unsigned int n0          = root_window.x().step();
     const unsigned int m0          = root_window.y().step();
 
-    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
-    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+    GpuCkwComponentArgument *src =
+        vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+    GpuCkwComponentArgument *dst =
+        vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
 
     TileOperand &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32);
     TileOperand &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
@@ -90,23 +91,26 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
     const auto    src_data_type = _src->data_type();
 
     // Check if this is global pooling path
-    const bool is_global_pooling = (pool_size_x == src_width) && (pool_size_y == src_height) && (pad_x == 0) && (pad_y == 0);
+    const bool is_global_pooling =
+        (pool_size_x == src_width) && (pool_size_y == src_height) && (pad_x == 0) && (pad_y == 0);
     // Check if this a case of FP_MIXED_PRECISION
-    const bool use_fp_mixed_precision = (src_data_type == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
-    const auto acc_data_type          = (use_fp_mixed_precision) ? (DataType::F32) : (src_data_type);
+    const bool use_fp_mixed_precision =
+        (src_data_type == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
+    const auto acc_data_type = (use_fp_mixed_precision) ? (DataType::F32) : (src_data_type);
 
     TileOperand       &const_0            = writer->declare_tile("0", 0);
     const TileOperand &const_1            = writer->declare_tile("1", 1);
     const TileOperand &const_lowest_value = writer->declare_tile("LOWEST_VALUE", std::numeric_limits<float>::lowest());
     const TileOperand &pool_size_x_tile   = writer->declare_tile("POOL_SIZE_X", pool_size_x);
     const TileOperand &pool_size_y_tile   = writer->declare_tile("POOL_SIZE_Y", pool_size_y);
-    const TileOperand &stride_x_tile      = writer->declare_tile("STRIDE_X", static_cast<int32_t>(_attributes.stride().x()));
-    const TileOperand &stride_y_tile      = writer->declare_tile("STRIDE_Y", static_cast<int32_t>(_attributes.stride().y()));
-    const TileOperand &pad_x_tile         = writer->declare_tile("PAD_X", pad_x);
-    const TileOperand &pad_y_tile         = writer->declare_tile("PAD_Y", pad_y);
-    const TileOperand &dst_height_tile    = writer->declare_tile("DST_HEIGHT", static_cast<int32_t>(_dst->dimension(height_idx)));
-    const TileOperand &src_height_tile    = writer->declare_tile("SRC_HEIGHT", src_height);
-    const TileOperand &src_width_tile     = writer->declare_tile("SRC_WIDTH", src_width);
+    const TileOperand &stride_x_tile = writer->declare_tile("STRIDE_X", static_cast<int32_t>(_attributes.stride().x()));
+    const TileOperand &stride_y_tile = writer->declare_tile("STRIDE_Y", static_cast<int32_t>(_attributes.stride().y()));
+    const TileOperand &pad_x_tile    = writer->declare_tile("PAD_X", pad_x);
+    const TileOperand &pad_y_tile    = writer->declare_tile("PAD_Y", pad_y);
+    const TileOperand &dst_height_tile =
+        writer->declare_tile("DST_HEIGHT", static_cast<int32_t>(_dst->dimension(height_idx)));
+    const TileOperand &src_height_tile = writer->declare_tile("SRC_HEIGHT", src_height);
+    const TileOperand &src_width_tile  = writer->declare_tile("SRC_WIDTH", src_width);
 
     TileOperand &idx_out_n = writer->declare_tile("idx_out_n", ckw::DataType::Int32);
     TileOperand &idx_out_h = writer->declare_tile("idx_out_h", ckw::DataType::Int32);
@@ -145,7 +149,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
 
     // Prepare dst tensor and tile
     TileInfo dst_tile_info = TileInfo(to_ckw(src_data_type), m0, n0);
-    if(!dst->has_tile())
+    if (!dst->has_tile())
     {
         TileOperand &dst_tile = writer->declare_tile("dst_tile", dst_tile_info);
         dst->init_virtual_tensor(dst_tile, dst_sampler);
@@ -156,14 +160,15 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
     const TileOperand &res_tile = writer->declare_tile("res_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
 
     // Initialise result tile with appropriate value
-    if(_attributes.pool_type() == PoolingType::MAX)
+    if (_attributes.pool_type() == PoolingType::MAX)
     {
-        if(_settings.use_inf_as_limit())
+        if (_settings.use_inf_as_limit())
         {
             TileContainer            minus_inf_tile_container;
             std::vector<std::string> value = std::vector<std::string>(n0, "(-INFINITY)");
-            minus_inf_tile_container.push_back({ value });
-            const TileOperand &minus_inf = writer->declare_tile("minus_inf_const", minus_inf_tile_container, to_ckw(acc_data_type));
+            minus_inf_tile_container.push_back({value});
+            const TileOperand &minus_inf =
+                writer->declare_tile("minus_inf_const", minus_inf_tile_container, to_ckw(acc_data_type));
             writer->op_assign(res_tile, minus_inf);
         }
         else
@@ -209,7 +214,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
     writer->op_binary_elementwise_function(pool_y_e, BinaryFunction::Min, pool_size_y_tile, pool_y_e);
 
     const TileOperand &filter_size = writer->declare_tile("filter_size", ckw::DataType::Int32);
-    if(_attributes.exclude_padding())
+    if (_attributes.exclude_padding())
     {
         const TileOperand &y_diff = writer->declare_tile("y_diff", ckw::DataType::Int32);
         const TileOperand &x_diff = writer->declare_tile("x_diff", ckw::DataType::Int32);
@@ -227,7 +232,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
     const TileOperand &x = writer->declare_tile("x", ckw::DataType::Int32);
     const TileOperand &y = writer->declare_tile("y", ckw::DataType::Int32);
 
-    if(is_global_pooling)
+    if (is_global_pooling)
     {
         writer->op_assign(x, const_0);
         writer->op_assign(y, const_0);
@@ -242,76 +247,80 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
     }
 
     // Y dim for-loop
-    writer->op_for_loop(y, BinaryOp::Less, pool_y_e, y, AssignmentOp::Increment, const_1, [&]()
-    {
-        // Reset the iterator for the inner loop
-        if(is_global_pooling)
-        {
-            writer->op_assign(x, const_0);
-        }
-        else
+    writer->op_for_loop(
+        y, BinaryOp::Less, pool_y_e, y, AssignmentOp::Increment, const_1,
+        [&]()
         {
-            writer->op_assign(x, pool_x_s);
-        }
-
-        TileOperand &a_y = writer->declare_tile("a_y", ckw::DataType::Int32);
-        writer->op_binary_expression(a_y, idx_in_h, BinaryOp::Add, y);
-
-        // X dim for-loop
-        writer->op_for_loop(x, BinaryOp::Less, pool_x_e, x, AssignmentOp::Increment, const_1, [&]()
-        {
-            TileOperand &a_x = writer->declare_tile("a_x", ckw::DataType::Int32);
-            writer->op_binary_expression(a_x, idx_in_w, BinaryOp::Add, x);
-
-            TileOperand &src_tile = writer->declare_tile("src_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
-
-            src_sampler.y(a_x);
-            src_sampler.z(a_y);
-
-            // Load src tile
-            if(use_fp_mixed_precision)
+            // Reset the iterator for the inner loop
+            if (is_global_pooling)
             {
-                TileOperand &src_uncasted_tile = writer->declare_tile("uncasted_src_tile", dst_tile_info);
-                writer->op_load(src_uncasted_tile, src->tensor(), src_sampler);
-                writer->op_cast_expression(src_tile, src_uncasted_tile, ckw::ConvertPolicy::None);
+                writer->op_assign(x, const_0);
             }
             else
             {
-                writer->op_load(src_tile, src->tensor(), src_sampler);
+                writer->op_assign(x, pool_x_s);
             }
 
-            // Take the square of the input, for L2 Pooling
-            if(_attributes.pool_type() == PoolingType::L2)
-            {
-                writer->op_binary_expression(src_tile, src_tile, BinaryOp::Mul, src_tile);
-            }
-
-            // Perfom Pooling op
-            if(_attributes.pool_type() == PoolingType::MAX)
-            {
-                writer->op_binary_elementwise_function(res_tile, BinaryFunction::Max, res_tile, src_tile);
-            }
-            else
-            {
-                writer->op_binary_expression(res_tile, res_tile, BinaryOp::Add, src_tile);
-            }
+            TileOperand &a_y = writer->declare_tile("a_y", ckw::DataType::Int32);
+            writer->op_binary_expression(a_y, idx_in_h, BinaryOp::Add, y);
+
+            // X dim for-loop
+            writer->op_for_loop(
+                x, BinaryOp::Less, pool_x_e, x, AssignmentOp::Increment, const_1,
+                [&]()
+                {
+                    TileOperand &a_x = writer->declare_tile("a_x", ckw::DataType::Int32);
+                    writer->op_binary_expression(a_x, idx_in_w, BinaryOp::Add, x);
+
+                    TileOperand &src_tile = writer->declare_tile("src_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
+
+                    src_sampler.y(a_x);
+                    src_sampler.z(a_y);
+
+                    // Load src tile
+                    if (use_fp_mixed_precision)
+                    {
+                        TileOperand &src_uncasted_tile = writer->declare_tile("uncasted_src_tile", dst_tile_info);
+                        writer->op_load(src_uncasted_tile, src->tensor(), src_sampler);
+                        writer->op_cast_expression(src_tile, src_uncasted_tile, ckw::ConvertPolicy::None);
+                    }
+                    else
+                    {
+                        writer->op_load(src_tile, src->tensor(), src_sampler);
+                    }
+
+                    // Take the square of the input, for L2 Pooling
+                    if (_attributes.pool_type() == PoolingType::L2)
+                    {
+                        writer->op_binary_expression(src_tile, src_tile, BinaryOp::Mul, src_tile);
+                    }
+
+                    // Perfom Pooling op
+                    if (_attributes.pool_type() == PoolingType::MAX)
+                    {
+                        writer->op_binary_elementwise_function(res_tile, BinaryFunction::Max, res_tile, src_tile);
+                    }
+                    else
+                    {
+                        writer->op_binary_expression(res_tile, res_tile, BinaryOp::Add, src_tile);
+                    }
+                });
         });
-    });
 
-    if((_attributes.pool_type() == PoolingType::AVG) || (_attributes.pool_type() == PoolingType::L2))
+    if ((_attributes.pool_type() == PoolingType::AVG) || (_attributes.pool_type() == PoolingType::L2))
     {
         // filter_size is automatically broadcasted in the operation
         writer->op_binary_expression(res_tile, res_tile, BinaryOp::Div, filter_size);
     }
 
     // Take square root of the result in L2 pooling
-    if(_attributes.pool_type() == PoolingType::L2)
+    if (_attributes.pool_type() == PoolingType::L2)
     {
         writer->op_unary_elementwise_function(res_tile, UnaryFunction::Sqrt, res_tile);
     }
 
     // Store the results and do casting if FP_MIXED_PRECISION
-    if(use_fp_mixed_precision)
+    if (use_fp_mixed_precision)
     {
         writer->op_cast_expression(dst_tile, res_tile, ckw::ConvertPolicy::None);
     }
@@ -326,7 +335,7 @@ Window GpuCkwPool2d::get_window() const
     ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
 
     TensorShape        output_shape = _dst->tensor_shape();
-    const unsigned int vec_size     = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
+    const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
     // Create and configure kernel window
     auto win = calculate_max_window(output_shape, Steps(vec_size));
     win      = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size.
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h
index 2ccf255236..822282a108 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h
@@ -59,9 +59,11 @@ public:
     /** Destructor */
     ~GpuCkwPool2d() override = default;
     // Inherited methods overriden:
-    virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
-    Window      get_window() const override;
-    std::string get_name(const ComponentGroup &comp_group) const override;
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    Window       get_window() const override;
+    std::string  get_name(const ComponentGroup &comp_group) const override;
 
 private:
     const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp
index d997c82dae..f2a7d41afd 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp
@@ -28,14 +28,13 @@
 
 #include "src/core/helpers/WindowHelpers.h"
 #include "src/core/utils/ScaleUtils.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
-
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 #include "support/StringSupport.h"
 
 namespace arm_compute
@@ -49,20 +48,17 @@ namespace
 constexpr unsigned int opencl_vector_size_in_bytes = 16;
 } // namespace
 
-GpuCkwResize::GpuCkwResize(ComponentId                      id,
-                           const ArgumentPack<ITensorInfo> &tensors,
-                           const Attributes                &attributes)
-    : IGpuCkwComponentDriver{ id, tensors },
-      _src{},
-      _dst{},
-      _attributes{ attributes }
+GpuCkwResize::GpuCkwResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC);
     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST);
     ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
 }
 
-void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup    &comp_group,
+                                              GpuCkwVariableTable     &vtable,
+                                              GpuCkwScopedKernelWriter writer) const
 {
     const size_t width_idx  = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::WIDTH);
     const size_t height_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::HEIGHT);
@@ -72,12 +68,16 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
     const int32_t m0          = root_window.y().step();
     const int32_t partial_n0  = _dst->dimension(0) % n0;
 
-    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
-    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+    GpuCkwComponentArgument *src =
+        vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+    GpuCkwComponentArgument *dst =
+        vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
 
     // Constants
-    const float scale_x      = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx), _attributes.align_corners());
-    const float scale_y      = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx), _attributes.align_corners());
+    const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx),
+                                                              _attributes.align_corners());
+    const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx),
+                                                              _attributes.align_corners());
     const auto &tile_scale_x = writer->declare_tile("scale_x", scale_x);
     const auto &tile_scale_y = writer->declare_tile("scale_y", scale_y);
     const auto &tile_0       = writer->declare_tile("0", 0);
@@ -112,7 +112,7 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
     const auto &tile_xi_f = writer->declare_tile("xi_f", ckw::DataType::Fp32);
     const auto &tile_yi_f = writer->declare_tile("yi_f", ckw::DataType::Fp32);
 
-    switch(_attributes.sampling_policy())
+    switch (_attributes.sampling_policy())
     {
         case SamplingPolicy::TOP_LEFT:
             // xi_f = (xo * scale_x)
@@ -138,7 +138,7 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
             ARM_COMPUTE_ERROR("Unsupported sampling policy");
     }
 
-    if(_attributes.align_corners())
+    if (_attributes.align_corners())
     {
         writer->op_unary_elementwise_function(tile_xi_f, UnaryFunction::Round, tile_xi_f);
         writer->op_unary_elementwise_function(tile_yi_f, UnaryFunction::Round, tile_yi_f);
@@ -161,8 +161,10 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
     auto &tile_xi0 = writer->declare_tile("xi0", ckw::DataType::Int32);
     auto &tile_yi0 = writer->declare_tile("yi0", ckw::DataType::Int32);
 
-    writer->op_ternary_elementwise_function(tile_xi0, TernaryFunction::Clamp, tile_xi_f_int, tile_0, tile_src_w_minus_1);
-    writer->op_ternary_elementwise_function(tile_yi0, TernaryFunction::Clamp, tile_yi_f_int, tile_0, tile_src_h_minus_1);
+    writer->op_ternary_elementwise_function(tile_xi0, TernaryFunction::Clamp, tile_xi_f_int, tile_0,
+                                            tile_src_w_minus_1);
+    writer->op_ternary_elementwise_function(tile_yi0, TernaryFunction::Clamp, tile_yi_f_int, tile_0,
+                                            tile_src_h_minus_1);
 
     TensorTileSampler src_sampler;
     src_sampler.x(tile_co);
@@ -199,7 +201,9 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
     writer->op_assign(tile_dst, tile_src);
 }
 
-void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwResize::do_bilinear_resize(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const
 {
     const size_t width_idx  = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::WIDTH);
     const size_t height_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::HEIGHT);
@@ -209,12 +213,16 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
     const int32_t m0          = root_window.y().step();
     const int32_t partial_n0  = _dst->dimension(0) % n0;
 
-    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
-    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+    GpuCkwComponentArgument *src =
+        vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+    GpuCkwComponentArgument *dst =
+        vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
 
     // Constants
-    const float scale_x      = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx), _attributes.align_corners());
-    const float scale_y      = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx), _attributes.align_corners());
+    const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx),
+                                                              _attributes.align_corners());
+    const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx),
+                                                              _attributes.align_corners());
     const auto &tile_scale_x = writer->declare_tile("scale_x", scale_x);
     const auto &tile_scale_y = writer->declare_tile("scale_y", scale_y);
     const auto &tile_0       = writer->declare_tile("0", 0);
@@ -251,7 +259,7 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
     const auto &tile_xi_f = writer->declare_tile("xi_f", ckw::DataType::Fp32);
     const auto &tile_yi_f = writer->declare_tile("yi_f", ckw::DataType::Fp32);
 
-    switch(_attributes.sampling_policy())
+    switch (_attributes.sampling_policy())
     {
         case SamplingPolicy::TOP_LEFT:
             // xi_f = (xo * scale_x)
@@ -312,8 +320,10 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
 
     writer->op_ternary_elementwise_function(tile_xi0, TernaryFunction::Clamp, tile_xi, tile_0, tile_src_w_minus_1);
     writer->op_ternary_elementwise_function(tile_yi0, TernaryFunction::Clamp, tile_yi, tile_0, tile_src_h_minus_1);
-    writer->op_ternary_elementwise_function(tile_xi1, TernaryFunction::Clamp, tile_xi_plus_1, tile_0, tile_src_w_minus_1);
-    writer->op_ternary_elementwise_function(tile_yi1, TernaryFunction::Clamp, tile_yi_plus_1, tile_0, tile_src_h_minus_1);
+    writer->op_ternary_elementwise_function(tile_xi1, TernaryFunction::Clamp, tile_xi_plus_1, tile_0,
+                                            tile_src_w_minus_1);
+    writer->op_ternary_elementwise_function(tile_yi1, TernaryFunction::Clamp, tile_yi_plus_1, tile_0,
+                                            tile_src_h_minus_1);
 
     TensorTileSampler in_sampler;
     in_sampler.x(tile_co);
@@ -388,7 +398,7 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
     writer->op_binary_expression(tile_a1, tile_yi_f, BinaryOp::Sub, tile_yi_float);
     writer->op_binary_expression(tile_b1, tile_1, BinaryOp::Sub, tile_a1);
 
-    if(is_data_type_float(_src->data_type()))
+    if (is_data_type_float(_src->data_type()))
     {
         // Cast weights to source type
         const auto &tile_a_src_type  = writer->declare_tile("a_src_t", to_ckw(_src->data_type()));
@@ -461,9 +471,11 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
     }
 }
 
-void GpuCkwResize::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwResize::write_component_code(const ComponentGroup    &comp_group,
+                                        GpuCkwVariableTable     &vtable,
+                                        GpuCkwScopedKernelWriter writer) const
 {
-    switch(_attributes.interpolation_policy())
+    switch (_attributes.interpolation_policy())
     {
         case InterpolationPolicy::NEAREST_NEIGHBOR:
             do_nearest_neighbor_resize(comp_group, vtable, writer);
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
index 8917391537..889706b0c0 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
@@ -24,10 +24,12 @@
 #include "GpuCkwStore.h"
 
 #include "arm_compute/core/Error.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+
 #include <string>
 
 namespace arm_compute
@@ -37,12 +39,14 @@ namespace experimental
 namespace dynamic_fusion
 {
 GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
-    : IGpuCkwComponentDriver{ id, tensors }, _src{}, _dst{}
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
 }
-void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwStore::write_component_code(const ComponentGroup    &comp_group,
+                                       GpuCkwVariableTable     &vtable,
+                                       GpuCkwScopedKernelWriter writer) const
 {
     auto src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
     auto dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
index 8e35651caf..f1f0e6747b 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
@@ -48,8 +48,10 @@ public:
     /** Destructor */
     ~GpuCkwStore() override = default;
     // Inherited methods overriden:
-    virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
-    std::string get_name(const ComponentGroup &comp_group) const override;
+    virtual void write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const override;
+    std::string  get_name(const ComponentGroup &comp_group) const override;
 
 private:
     const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h
index e2b8584b99..6ba2b2f651 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/utils/misc/Utility.h"
 #include "ckw/TensorTileSampler.h"
+
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
@@ -44,9 +45,14 @@ using SamplerCreator = std::function<TensorTileSampler(GpuCkwScopedKernelWriter
 
 /** Load src and dst tiles of dimension [m0, n0] only when not loaded and prepare the sampler
  */
-inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &writer, GpuCkwComponentArgument *src, GpuCkwComponentArgument *dst, int32_t m0, int32_t n0, SamplerCreator create_sampler)
+inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &writer,
+                                                   GpuCkwComponentArgument  *src,
+                                                   GpuCkwComponentArgument  *dst,
+                                                   int32_t                   m0,
+                                                   int32_t                   n0,
+                                                   SamplerCreator            create_sampler)
 {
-    if(!src->has_tile())
+    if (!src->has_tile())
     {
         const auto sampler = create_sampler(writer, m0, n0);
         writer->op_load_once(src, sampler);
@@ -61,7 +67,7 @@ inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &wri
     const auto &sampler  = src->tile_sampler();
 
     // Prepare the output tile.
-    if(!dst->has_tile())
+    if (!dst->has_tile())
     {
         auto &tile = writer->declare_tile("dst_tile", src_tile.tile_info());
         dst->init_virtual_tensor(tile, sampler);
@@ -78,7 +84,13 @@ inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &wri
  * @param[in]     prefix          Prefix to all the tiles declared within this function
  * @param[in]     const_0         Constant tile of value 0
  */
-inline void get_coord(GpuCkwScopedKernelWriter writer, TileOperand &coord, const TileOperand &gid, int32_t step_v, int32_t leftover_step_v, const std::string &prefix, const TileOperand &const_0)
+inline void get_coord(GpuCkwScopedKernelWriter writer,
+                      TileOperand             &coord,
+                      const TileOperand       &gid,
+                      int32_t                  step_v,
+                      int32_t                  leftover_step_v,
+                      const std::string       &prefix,
+                      const TileOperand       &const_0)
 {
     auto &step          = writer->declare_tile(prefix + "step", step_v);
     auto &leftover_step = writer->declare_tile(prefix + "leftover_step", leftover_step_v);
@@ -122,8 +134,15 @@ inline void get_coord(GpuCkwScopedKernelWriter writer, TileOperand &coord, const
  *
  * @return TensorTileSampler
  */
-inline TensorTileSampler create_boundary_aware_2d_sampler(GpuCkwScopedKernelWriter writer, TileOperand &gid_0, TileOperand &gid_1, int32_t dim0_v, int32_t dim1_v, int32_t n0_v, int32_t m0_v,
-                                                          const std::string prefix, TileOperand &const_0)
+inline TensorTileSampler create_boundary_aware_2d_sampler(GpuCkwScopedKernelWriter writer,
+                                                          TileOperand             &gid_0,
+                                                          TileOperand             &gid_1,
+                                                          int32_t                  dim0_v,
+                                                          int32_t                  dim1_v,
+                                                          int32_t                  n0_v,
+                                                          int32_t                  m0_v,
+                                                          const std::string        prefix,
+                                                          TileOperand             &const_0)
 {
     // Clamp tile size [n0, m0] against dimension [dim0, dim1]
     // This is needed to:
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h
index 34b1283add..5da317bf38 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h
@@ -28,6 +28,7 @@
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
 #include "ckw/TensorInfo.h"
+
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
 
 namespace arm_compute
@@ -38,7 +39,7 @@ namespace dynamic_fusion
 {
 inline ckw::DataType to_ckw(DataType dt)
 {
-    switch(dt)
+    switch (dt)
     {
         case DataType::F32:
             return ckw::DataType::Fp32;
@@ -65,21 +66,16 @@ inline ckw::DataType to_ckw(DataType dt)
 
 inline ckw::TensorShape to_ckw(const TensorShape &shape)
 {
-    ARM_COMPUTE_ERROR_ON(shape.num_max_dimensions < std::tuple_size<ckw::TensorShape> {});
-    ARM_COMPUTE_ERROR_ON(std::tuple_size<ckw::TensorShape> {} != 5);
+    ARM_COMPUTE_ERROR_ON(shape.num_max_dimensions < std::tuple_size<ckw::TensorShape>{});
+    ARM_COMPUTE_ERROR_ON(std::tuple_size<ckw::TensorShape>{} != 5);
     /// NOTE: Overflow danger. Use size_t?
-    return ckw::TensorShape
-    {
-        static_cast<int32_t>(shape[0]),
-        static_cast<int32_t>(shape[1]),
-        static_cast<int32_t>(shape[2]),
-        static_cast<int32_t>(shape[3]),
-        static_cast<int32_t>(shape[4])
-    };
+    return ckw::TensorShape{static_cast<int32_t>(shape[0]), static_cast<int32_t>(shape[1]),
+                            static_cast<int32_t>(shape[2]), static_cast<int32_t>(shape[3]),
+                            static_cast<int32_t>(shape[4])};
 }
 inline ckw::TensorDataLayout to_ckw(DataLayout dl)
 {
-    switch(dl)
+    switch (dl)
     {
         case DataLayout::NHWC:
             return ckw::TensorDataLayout::Nhwc;
@@ -91,18 +87,13 @@ inline ckw::TensorDataLayout to_ckw(DataLayout dl)
 }
 inline ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info)
 {
-    return ckw::TensorInfo
-    {
-        to_ckw(tensor_info.data_type()),
-        to_ckw(tensor_info.tensor_shape()),
-        to_ckw(tensor_info.data_layout()),
-        tensor_info.id()
-    };
+    return ckw::TensorInfo{to_ckw(tensor_info.data_type()), to_ckw(tensor_info.tensor_shape()),
+                           to_ckw(tensor_info.data_layout()), tensor_info.id()};
 }
 
 inline TensorComponentType from_ckw(const ckw::TensorComponentType &component)
 {
-    switch(component)
+    switch (component)
     {
         case ckw::TensorComponentType::OffsetFirstElement:
             return TensorComponentType::OffsetFirstElement;
@@ -142,7 +133,7 @@ inline TensorComponentType from_ckw(const ckw::TensorComponentType &component)
 
 inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage)
 {
-    switch(storage)
+    switch (storage)
     {
         case TensorStorageType::ClBufferUint8Ptr:
             return ckw::TensorStorageType::BufferUint8Ptr;
@@ -159,7 +150,7 @@ inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage)
 }
 inline TensorStorageType from_ckw(const ckw::TensorStorageType &storage)
 {
-    switch(storage)
+    switch (storage)
     {
         case ckw::TensorStorageType::BufferUint8Ptr:
             return TensorStorageType::ClBufferUint8Ptr;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h
index 9cb022fc10..0cba258940 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h
@@ -25,6 +25,7 @@
 #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPE_CONVERTER_ELEMENTWISEBINARY
 
 #include "ckw/types/Operators.h"
+
 #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
 
 namespace arm_compute
@@ -35,7 +36,7 @@ namespace dynamic_fusion
 {
 inline ckw::BinaryOp to_ckw(const ElementwiseBinaryCommonAttributes &attributes)
 {
-    switch(attributes.operation())
+    switch (attributes.operation())
     {
         case ElementwiseBinaryCommonAttributes::ElementwiseOp::Add:
             return ckw::BinaryOp::Add;
diff --git a/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h b/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h
index f7f0029618..ee109a7e2b 100644
--- a/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h
+++ b/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h
@@ -24,8 +24,9 @@
 #ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_GPUKERNELCOMPONENTFACTORY
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_GPUKERNELCOMPONENTFACTORY
 
-#include "Types.h"
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+#include "Types.h"
 #include <memory>
 
 namespace arm_compute
@@ -49,13 +50,13 @@ public:
      * @return std::unique_ptr<IGpuKernelComponent>
      */
     template <typename T, typename... Args>
-    std::unique_ptr<IGpuKernelComponent> create(Args &&... args)
+    std::unique_ptr<IGpuKernelComponent> create(Args &&...args)
     {
         return std::make_unique<T>(_count++, std::forward<Args>(args)...);
     }
 
 private:
-    ComponentId _count{ 0 };
+    ComponentId _count{0};
 };
 
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
index af766a7ece..4b8eea2f57 100644
--- a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
+++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
@@ -24,11 +24,11 @@
 #ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
 #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
 
-#include "Types.h"
-
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
 
+#include "Types.h"
+
 namespace arm_compute
 {
 namespace experimental
@@ -76,13 +76,8 @@ public:
      * @param[in] properties Kernel component properties
      * @param[in] tensors    Tensor arguments to the components
      */
-    IGpuKernelComponent(
-        ComponentId                      id,
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors)
-        : _id{ id },
-          _properties{ properties },
-          _tensors{ tensors }
+    IGpuKernelComponent(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors)
+        : _id{id}, _properties{properties}, _tensors{tensors}
     {
     }
     /** Destructor */
@@ -117,7 +112,7 @@ public:
     virtual GpuComponentType type() const = 0;
 
 private:
-    ComponentId               _id{ -1 };
+    ComponentId               _id{-1};
     Properties                _properties{};
     ArgumentPack<ITensorInfo> _tensors{};
 };
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
index c41257d18c..fdf528a65d 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
@@ -68,17 +68,11 @@ ClComponentActivation::ClComponentActivation(ComponentId
                                              const IGpuKernelComponent::Properties &properties,
                                              const ArgumentPack<ITensorInfo>       &tensors,
                                              const Attributes                      &attributes)
-    : IGpuKernelComponent{ id, properties, tensors },
+    : IGpuKernelComponent{id, properties, tensors},
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer
-{
-    std::make_unique<ClTemplateActivation>(id, tensors, attributes)
-}
+      _component_writer{std::make_unique<ClTemplateActivation>(id, tensors, attributes)}
 #else  //ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer
-{
-    std::make_unique<GpuCkwActivation>(id, tensors, attributes)
-}
+      _component_writer{std::make_unique<GpuCkwActivation>(id, tensors, attributes)}
 #endif //ACL_INTERNAL_TEST_CKW_IN_DF
 {
 }
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
index 9b090af988..02c854356a 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
@@ -25,9 +25,8 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION
 
 #include "arm_compute/function_info/ActivationLayerInfo.h"
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 
-#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 
 namespace arm_compute
 {
@@ -79,20 +78,17 @@ public:
      * |F16            |F16            |
      * |F32            |F32            |
      */
-    static Status validate(
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes);
+    static Status
+    validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
 
     /** Constructor
      *
      * Similar to @ref ClComponentActivation::validate()
      */
-    ClComponentActivation(
-        ComponentId                      id,
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes);
+    ClComponentActivation(ComponentId                      id,
+                          const Properties                &properties,
+                          const ArgumentPack<ITensorInfo> &tensors,
+                          const Attributes                &attributes);
 
     /** Destructor */
     ~ClComponentActivation() override;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
index 635869f817..b1636795a3 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
@@ -24,6 +24,7 @@
 #include "ClComponentCast.h"
 
 #include "arm_compute/core/Error.h"
+
 #include "src/core/CL/CLValidate.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
@@ -38,11 +39,10 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-Status ClComponentCast::validate(
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors,
-    const Attributes                &attributes,
-    const Settings                  &settings)
+Status ClComponentCast::validate(const Properties                &properties,
+                                 const ArgumentPack<ITensorInfo> &tensors,
+                                 const Attributes                &attributes,
+                                 const Settings                  &settings)
 {
     ARM_COMPUTE_UNUSED(properties, attributes, settings);
 
@@ -53,13 +53,15 @@ Status ClComponentCast::validate(
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(dst);
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_RETURN_ERROR_ON(src == dst);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == attributes.data_type(), "input and target data types should be different");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == attributes.data_type(),
+                                    "input and target data types should be different");
 
     // Validate in case of configured dst
-    if(dst->total_size() > 0)
+    if (dst->total_size() > 0)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst->data_type() != attributes.data_type(), "dst and target data types should be same");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst->data_type() != attributes.data_type(),
+                                        "dst and target data types should be same");
     }
 
     return Status{};
@@ -69,17 +71,11 @@ ClComponentCast::ClComponentCast(ComponentId                      id,
                                  const ArgumentPack<ITensorInfo> &tensors,
                                  const Attributes                &attributes,
                                  const Settings                  &settings)
-    : IGpuKernelComponent{ id, properties, tensors },
+    : IGpuKernelComponent{id, properties, tensors},
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer
-{
-    std::make_unique<ClTemplateCast>(id, tensors, attributes)
-}
+      _component_writer{std::make_unique<ClTemplateCast>(id, tensors, attributes)}
 #else  //ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer
-{
-    std::make_unique<GpuCkwCast>(id, tensors, attributes)
-}
+      _component_writer{std::make_unique<GpuCkwCast>(id, tensors, attributes)}
 #endif //ACL_INTERNAL_TEST_CKW_IN_DF
 {
     ARM_COMPUTE_UNUSED(attributes, settings);
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
index 37b8cbb6c9..ed77b1203b 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST
 
 #include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 
 namespace arm_compute
@@ -93,11 +94,10 @@ public:
      * |F16            | U8, S8, U16, S16, U32, S32, F32       |
      * |F32            | U8, S8, U16, S16, U32, S32, F16       |
      */
-    static Status validate(
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes,
-        const Settings                  &settings);
+    static Status validate(const Properties                &properties,
+                           const ArgumentPack<ITensorInfo> &tensors,
+                           const Attributes                &attributes,
+                           const Settings                  &settings);
 
     /** Constructor
      *
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
index 5626093079..d95e0be1f2 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
+
 #include "src/core/CL/CLValidate.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h"
 
@@ -103,11 +104,10 @@ unsigned int Settings::m0() const
     return _m0;
 }
 
-Status ClComponentDepthwiseConv2d::validate(
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors,
-    const Attributes                &attributes,
-    const Settings                  &settings)
+Status ClComponentDepthwiseConv2d::validate(const Properties                &properties,
+                                            const ArgumentPack<ITensorInfo> &tensors,
+                                            const Attributes                &attributes,
+                                            const Settings                  &settings)
 {
     ARM_COMPUTE_UNUSED(properties, settings);
     const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
@@ -121,7 +121,7 @@ Status ClComponentDepthwiseConv2d::validate(
     // Matching data type
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, wei);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
-    if(bia != nullptr)
+    if (bia != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, bia);
     }
@@ -129,7 +129,7 @@ Status ClComponentDepthwiseConv2d::validate(
     // Matching data layout
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, wei);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
-    if(bia != nullptr)
+    if (bia != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, bia);
     }
@@ -138,7 +138,7 @@ Status ClComponentDepthwiseConv2d::validate(
     ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
     ARM_COMPUTE_RETURN_ERROR_ON(wei->tensor_shape().total_size() == 0);
     ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-    if(bia != nullptr)
+    if (bia != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON(bia->tensor_shape().total_size() == 0);
     }
@@ -148,16 +148,17 @@ Status ClComponentDepthwiseConv2d::validate(
     const DataLayout data_layout = src->data_layout();
     const size_t     channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
 
-    ARM_COMPUTE_RETURN_ERROR_ON(wei->dimension(channel_idx) != (src->dimension(channel_idx) * attributes.depth_multiplier()));
+    ARM_COMPUTE_RETURN_ERROR_ON(wei->dimension(channel_idx) !=
+                                (src->dimension(channel_idx) * attributes.depth_multiplier()));
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->num_dimensions() > 3, "Weights can be at most 3 dimensional");
 
     // dst shape is correct
-    const PadStrideInfo pad_stride_info = PadStrideInfo(attributes.stride().x(), attributes.stride().y(),
-                                                        attributes.pad().left, attributes.pad().right,
-                                                        attributes.pad().top, attributes.pad().bottom,
-                                                        attributes.dimension_rounding_type());
-    const ConvolutionInfo conv_info{ pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(), attributes.dilation() };
-    const TensorShape     output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *wei, conv_info);
+    const PadStrideInfo pad_stride_info =
+        PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, attributes.pad().right,
+                      attributes.pad().top, attributes.pad().bottom, attributes.dimension_rounding_type());
+    const ConvolutionInfo conv_info{pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(),
+                                    attributes.dilation()};
+    const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *wei, conv_info);
 
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), output_shape);
 
@@ -168,19 +169,22 @@ Status ClComponentDepthwiseConv2d::validate(
     ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride().first > 1 && settings.m0() != 1);
     ARM_COMPUTE_RETURN_ERROR_ON(conv_info.dilation.x() > 1 && settings.m0() != 1);
 
-    if(conv_info.depth_multiplier > 1 && settings.n0() > 1)
+    if (conv_info.depth_multiplier > 1 && settings.n0() > 1)
     {
         ARM_COMPUTE_RETURN_ERROR_ON((conv_info.depth_multiplier % settings.n0()) != 0);
     }
 
     // Check export weights to cl image
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG((settings.export_weights_to_cl_image() == true) && (export_to_cl_image(wei) == false), "Weights cannot be exported to cl_image!");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG((settings.export_weights_to_cl_image() == true) &&
+                                        (export_to_cl_image(wei) == false),
+                                    "Weights cannot be exported to cl_image!");
     ARM_COMPUTE_RETURN_ERROR_ON((settings.export_weights_to_cl_image() == true) && ((settings.n0() % 4) != 0));
 
-    ARM_COMPUTE_RETURN_ERROR_ON(wei->dimension(channel_idx) != (src->dimension(channel_idx) * conv_info.depth_multiplier));
+    ARM_COMPUTE_RETURN_ERROR_ON(wei->dimension(channel_idx) !=
+                                (src->dimension(channel_idx) * conv_info.depth_multiplier));
 
     // bia shape is correct
-    if(bia != nullptr)
+    if (bia != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->dimension(0) != output_shape[channel_idx],
                                         "Biases size and number of dst feature maps should match");
@@ -198,14 +202,13 @@ Status ClComponentDepthwiseConv2d::validate(
     return Status{};
 }
 
-ClComponentDepthwiseConv2d::ClComponentDepthwiseConv2d(
-    ComponentId                      id,
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors,
-    const Attributes                &attributes,
-    const Settings                  &settings)
-    : IGpuKernelComponent{ id, properties, tensors },
-      _component_writer{ std::make_unique<ClTemplateDepthwiseConv2d>(id, tensors, attributes, settings) }
+ClComponentDepthwiseConv2d::ClComponentDepthwiseConv2d(ComponentId                      id,
+                                                       const Properties                &properties,
+                                                       const ArgumentPack<ITensorInfo> &tensors,
+                                                       const Attributes                &attributes,
+                                                       const Settings                  &settings)
+    : IGpuKernelComponent{id, properties, tensors},
+      _component_writer{std::make_unique<ClTemplateDepthwiseConv2d>(id, tensors, attributes, settings)}
 {
 }
 ClComponentDepthwiseConv2d::~ClComponentDepthwiseConv2d()
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
index 0e2b5f14cb..b3e1bd222d 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
@@ -25,7 +25,9 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDEPTHWISECONV2D
 
 #include "arm_compute/core/Error.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
 #include <memory>
 
 namespace arm_compute
@@ -77,12 +79,12 @@ public:
     unsigned int m0() const;
 
 private:
-    bool         _export_input_to_cl_image{ false };   /**< Export input to cl_image */
-    bool         _export_weights_to_cl_image{ false }; /**< Export the weights to cl_image */
-    bool         _fast_relaxed_math{ true };           /**< Enable/disable -cl-fast-relaxed-math flag */
-    bool         _is_fma_available{ false };           /**< Is fma instruction available */
-    unsigned int _n0{ 0 };                             /**< Number of columns processed by each thread */
-    unsigned int _m0{ 0 };                             /**< Number of rows processed by each thread */
+    bool         _export_input_to_cl_image{false};   /**< Export input to cl_image */
+    bool         _export_weights_to_cl_image{false}; /**< Export the weights to cl_image */
+    bool         _fast_relaxed_math{true};           /**< Enable/disable -cl-fast-relaxed-math flag */
+    bool         _is_fma_available{false};           /**< Is fma instruction available */
+    unsigned int _n0{0};                             /**< Number of columns processed by each thread */
+    unsigned int _m0{0};                             /**< Number of rows processed by each thread */
 };
 
 /** Forward declaration */
@@ -127,22 +129,20 @@ public:
      * |F16            |F16            |F16            |F16            |
      * |F32            |F32            |F32            |F32            |
      */
-    static Status validate(
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes,
-        const Settings                  &settings);
+    static Status validate(const Properties                &properties,
+                           const ArgumentPack<ITensorInfo> &tensors,
+                           const Attributes                &attributes,
+                           const Settings                  &settings);
 
     /** Constructor
      *
      * Similar to @ref ClComponentDepthwiseConv2d::validate()
      */
-    ClComponentDepthwiseConv2d(
-        ComponentId                      id,
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes,
-        const Settings                  &settings);
+    ClComponentDepthwiseConv2d(ComponentId                      id,
+                               const Properties                &properties,
+                               const ArgumentPack<ITensorInfo> &tensors,
+                               const Attributes                &attributes,
+                               const Settings                  &settings);
 
     /** Destructor */
     ~ClComponentDepthwiseConv2d() override;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
index a713c82003..98f3d6a882 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
@@ -23,8 +23,8 @@
  */
 #include "ClComponentDirectConv2d.h"
 
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
 #include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
 
 #include "src/core/CL/CLValidate.h"
@@ -57,7 +57,8 @@ bool ClComponentDirectConv2dSettings::fast_relaxed_math() const
     return _fast_relaxed_math;
 }
 
-ClComponentDirectConv2dSettings &ClComponentDirectConv2dSettings::direct_conv_descriptor(const DirectConvComputeKernelInfo &desc)
+ClComponentDirectConv2dSettings &
+ClComponentDirectConv2dSettings::direct_conv_descriptor(const DirectConvComputeKernelInfo &desc)
 {
     _desc = desc;
     return *this;
@@ -68,11 +69,10 @@ DirectConvComputeKernelInfo ClComponentDirectConv2dSettings::direct_conv_descrip
     return _desc;
 }
 
-Status ClComponentDirectConv2d::validate(
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors,
-    const Attributes                &attributes,
-    const Settings                  &settings)
+Status ClComponentDirectConv2d::validate(const Properties                &properties,
+                                         const ArgumentPack<ITensorInfo> &tensors,
+                                         const Attributes                &attributes,
+                                         const Settings                  &settings)
 {
     ARM_COMPUTE_UNUSED(properties);
     const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
@@ -86,7 +86,7 @@ Status ClComponentDirectConv2d::validate(
     // Matching data type
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, wei);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
-    if(bia != nullptr)
+    if (bia != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, bia);
     }
@@ -94,7 +94,7 @@ Status ClComponentDirectConv2d::validate(
     // Matching data layout
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, wei);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
-    if(bia != nullptr)
+    if (bia != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, bia);
     }
@@ -103,7 +103,7 @@ Status ClComponentDirectConv2d::validate(
     ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
     ARM_COMPUTE_RETURN_ERROR_ON(wei->tensor_shape().total_size() == 0);
     ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-    if(bia != nullptr)
+    if (bia != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON(bia->tensor_shape().total_size() == 0);
     }
@@ -112,22 +112,23 @@ Status ClComponentDirectConv2d::validate(
     // wei shape is correct
     const DataLayout data_layout = src->data_layout();
     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->dimension(channel_idx) != src->dimension(channel_idx), "Weights feature map dimension should match the respective src's one");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->dimension(channel_idx) != src->dimension(channel_idx),
+                                    "Weights feature map dimension should match the respective src's one");
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->num_dimensions() > 4, "Weights can be at most 4 dimensional");
 
     // dst shape is correct
-    PadStrideInfo legacy_pad_stride(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, attributes.pad().right, attributes.pad().top,
-                                    attributes.pad().bottom, DimensionRoundingType{});
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(),
-                                                       misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, legacy_pad_stride));
+    PadStrideInfo legacy_pad_stride(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+                                    attributes.pad().right, attributes.pad().top, attributes.pad().bottom,
+                                    DimensionRoundingType{});
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(
+        dst->tensor_shape(), misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, legacy_pad_stride));
 
     // bia shape is correct
-    if(bia != nullptr)
+    if (bia != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->dimension(0) != wei->dimension(3),
                                         "Biases size and number of dst feature maps should match");
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->num_dimensions() > 1,
-                                        "Biases should be one dimensional");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->num_dimensions() > 1, "Biases should be one dimensional");
     }
 
     // 2. Check support level
@@ -137,24 +138,25 @@ Status ClComponentDirectConv2d::validate(
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
 
     const auto desc = settings.direct_conv_descriptor();
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.n0 != 1 && desc.n0 != 2 && desc.n0 != 3 && desc.n0 != 4 && desc.n0 != 8 && desc.n0 != 16,
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.n0 != 1 && desc.n0 != 2 && desc.n0 != 3 && desc.n0 != 4 && desc.n0 != 8 &&
+                                        desc.n0 != 16,
                                     "N0 can only be: 1, 2, 3, 4, 8, and 16");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 1 && desc.k0 != 2 && desc.k0 != 3 && desc.k0 != 4 && desc.k0 != 8 && desc.k0 != 16,
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 1 && desc.k0 != 2 && desc.k0 != 3 && desc.k0 != 4 && desc.k0 != 8 &&
+                                        desc.k0 != 16,
                                     "K0 can only be: 1, 2, 3, 4, 8, and 16");
     return Status{};
 }
 
-ClComponentDirectConv2d::ClComponentDirectConv2d(
-    ComponentId                      id,
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors,
-    const Attributes                &attributes,
-    const Settings                  &settings)
-    : IGpuKernelComponent{ id, properties, tensors },
+ClComponentDirectConv2d::ClComponentDirectConv2d(ComponentId                      id,
+                                                 const Properties                &properties,
+                                                 const ArgumentPack<ITensorInfo> &tensors,
+                                                 const Attributes                &attributes,
+                                                 const Settings                  &settings)
+    : IGpuKernelComponent{id, properties, tensors},
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer{ std::make_unique<ClTemplateDirectConv2d>(id, tensors, attributes, settings) }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer{ std::make_unique<GpuCkwDirectConv2d>(id, tensors, attributes, settings) }
+      _component_writer{std::make_unique<ClTemplateDirectConv2d>(id, tensors, attributes, settings)}
+#else  // ACL_INTERNAL_TEST_CKW_IN_DF
+      _component_writer{std::make_unique<GpuCkwDirectConv2d>(id, tensors, attributes, settings)}
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 {
 }
@@ -165,7 +167,7 @@ ClComponentDirectConv2d::~ClComponentDirectConv2d()
 
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
 const IGpuTemplateComponentWriter *ClComponentDirectConv2d::template_writer() const
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#else  // ACL_INTERNAL_TEST_CKW_IN_DF
 const IGpuCkwComponentDriver *ClComponentDirectConv2d::ckw_component_driver() const
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 {
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
index 24acb1b2c1..d6d9705d3c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
@@ -26,7 +26,9 @@
 
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/KernelDescriptors.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
 #include <memory>
 
 namespace arm_compute
@@ -61,7 +63,7 @@ public:
     DirectConvComputeKernelInfo direct_conv_descriptor() const;
 
 private:
-    bool                        _fast_relaxed_math{ true };
+    bool                        _fast_relaxed_math{true};
     DirectConvComputeKernelInfo _desc{}; // Direct convolution descriptor
 };
 
@@ -111,22 +113,20 @@ public:
      * |F16            |F16            |F16            |F16            |
      * |F32            |F32            |F32            |F32            |
      */
-    static Status validate(
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes,
-        const Settings                  &settings);
+    static Status validate(const Properties                &properties,
+                           const ArgumentPack<ITensorInfo> &tensors,
+                           const Attributes                &attributes,
+                           const Settings                  &settings);
 
     /** Constructor
      *
      * Similar to @ref ClComponentDirectConv2d::validate()
      */
-    ClComponentDirectConv2d(
-        ComponentId                      id,
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes,
-        const Settings                  &settings);
+    ClComponentDirectConv2d(ComponentId                      id,
+                            const Properties                &properties,
+                            const ArgumentPack<ITensorInfo> &tensors,
+                            const Attributes                &attributes,
+                            const Settings                  &settings);
 
     /** Destructor */
     ~ClComponentDirectConv2d() override;
@@ -142,7 +142,7 @@ public:
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
     const IGpuTemplateComponentWriter *template_writer() const override;
 #else  // ACL_INTERNAL_TEST_CKW_IN_DF
-    const IGpuCkwComponentDriver *ckw_component_driver() const override;
+    const IGpuCkwComponentDriver       *ckw_component_driver() const override;
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
     /** Get component type */
     GpuComponentType type() const override
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
index 88d729170c..5b136427e4 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
@@ -24,6 +24,7 @@
 #include "ClComponentElementwiseBinary.h"
 
 #include "arm_compute/core/Validate.h"
+
 #include "src/core/CL/CLValidate.h"
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
 #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h"
@@ -39,56 +40,55 @@ namespace dynamic_fusion
 {
 namespace
 {
-std::set<ElementwiseBinaryCommonAttributes::ElementwiseOp> supported_ops
-{
-    ElementwiseBinaryCommonAttributes::ElementwiseOp::Add,
-    ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub,
-    ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul
-};
+std::set<ElementwiseBinaryCommonAttributes::ElementwiseOp> supported_ops{
+    ElementwiseBinaryCommonAttributes::ElementwiseOp::Add, ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub,
+    ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul};
 }
 
-Status ClComponentElementwiseBinary::validate(const ArgumentPack<ITensorInfo> &tensors, const ElementwiseBinaryCommonAttributes &attributes)
+Status ClComponentElementwiseBinary::validate(const ArgumentPack<ITensorInfo>         &tensors,
+                                              const ElementwiseBinaryCommonAttributes &attributes)
 {
     const auto lhs = tensors.get_const_tensor(TensorType::ACL_SRC_0);
     const auto rhs = tensors.get_const_tensor(TensorType::ACL_SRC_1);
     const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
 
     // Check operator type
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(supported_ops.find(attributes.operation()) == supported_ops.end(), "Provided Elementwise operation not supported.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(supported_ops.find(attributes.operation()) == supported_ops.end(),
+                                    "Provided Elementwise operation not supported.");
 
     // Check validity
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
 
     //Check data type for different elementwise operators
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16, DataType::S32, DataType::S16, DataType::U8);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16, DataType::S32,
+                                                         DataType::S16, DataType::U8);
 
     // dst shape is correct
     const TensorShape out_shape = TensorShape::broadcast_shape(lhs->tensor_shape(), rhs->tensor_shape());
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0), "Wrong shape for dst.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0),
+                                    "Wrong shape for dst.");
 
     const auto &lhs_shape = lhs->tensor_shape();
     const auto &rhs_shape = rhs->tensor_shape();
     const auto &dst_shape = dst->tensor_shape();
 
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-        detail::have_different_dimensions(lhs_shape, dst_shape, 0) && detail::have_different_dimensions(rhs_shape, dst_shape, 0),
-        "Only LHS or RHS can be broadcasting, not both.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(lhs_shape, dst_shape, 0) &&
+                                        detail::have_different_dimensions(rhs_shape, dst_shape, 0),
+                                    "Only LHS or RHS can be broadcasting, not both.");
 
     // Dimension Y and Z are collapsed together in the current kernel implementation,
     // hence they cannot be independently broadcast or non-broadcast.
     // See: ClTemplateElementwiseBinary::get_window
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-        (lhs_shape[1] != dst_shape[1] || rhs_shape[1] != dst_shape[1]) != (lhs_shape[2] != dst_shape[2] || rhs_shape[2] != dst_shape[2]),
-        "Dimension Y and Z must both be either broadcast or non-broadcast.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG((lhs_shape[1] != dst_shape[1] || rhs_shape[1] != dst_shape[1]) !=
+                                        (lhs_shape[2] != dst_shape[2] || rhs_shape[2] != dst_shape[2]),
+                                    "Dimension Y and Z must both be either broadcast or non-broadcast.");
 
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-        detail::have_different_dimensions(lhs_shape, dst_shape, 3),
-        "LHS broadcast in dimension 3 or higher is not supported.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(lhs_shape, dst_shape, 3),
+                                    "LHS broadcast in dimension 3 or higher is not supported.");
 
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-        detail::have_different_dimensions(rhs_shape, dst_shape, 3),
-        "RHS broadcast in dimension 3 or higher is not supported.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(rhs_shape, dst_shape, 3),
+                                    "RHS broadcast in dimension 3 or higher is not supported.");
 
     // Matching data type
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs);
@@ -112,22 +112,15 @@ Status ClComponentElementwiseBinary::validate(const ArgumentPack<ITensorInfo> &t
 ClComponentElementwiseBinary::~ClComponentElementwiseBinary()
 {
 }
-ClComponentElementwiseBinary::ClComponentElementwiseBinary(
-    ComponentId                      id,
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors,
-    const Attributes                &attributes)
-    : IGpuKernelComponent{ id, properties, tensors },
+ClComponentElementwiseBinary::ClComponentElementwiseBinary(ComponentId                      id,
+                                                           const Properties                &properties,
+                                                           const ArgumentPack<ITensorInfo> &tensors,
+                                                           const Attributes                &attributes)
+    : IGpuKernelComponent{id, properties, tensors},
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer
-{
-    std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes)
-}
+      _component_writer{std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes)}
 #else  //ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer
-{
-    std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes)
-}
+      _component_writer{std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes)}
 #endif //ACL_INTERNAL_TEST_CKW_IN_DF
 {
 }
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
index f7175903d0..7589b9732c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
@@ -82,17 +82,17 @@ public:
      * |S16            |S16            |S16            |
      * |U8             |U8             |U8             |
      */
-    static Status validate(const ArgumentPack<ITensorInfo> &tensors, const ElementwiseBinaryCommonAttributes &attributes);
+    static Status validate(const ArgumentPack<ITensorInfo>         &tensors,
+                           const ElementwiseBinaryCommonAttributes &attributes);
 
     /** Constructor
      *
      * Similar to @ref ClComponentElementwiseBinary::validate()
      */
-    ClComponentElementwiseBinary(
-        ComponentId                      id,
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes);
+    ClComponentElementwiseBinary(ComponentId                      id,
+                                 const Properties                &properties,
+                                 const ArgumentPack<ITensorInfo> &tensors,
+                                 const Attributes                &attributes);
 
     /** Destructor */
     ~ClComponentElementwiseBinary() override;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp
index 279c77e227..27c13bd654 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp
@@ -25,9 +25,10 @@
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
 
 #include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
 #include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
+
 #include "src/core/CL/CLValidate.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h"
 
@@ -37,10 +38,9 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-Status ClComponentLogits1DMaxShiftExpSum::validate(
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors,
-    const Attributes                &attributes)
+Status ClComponentLogits1DMaxShiftExpSum::validate(const Properties                &properties,
+                                                   const ArgumentPack<ITensorInfo> &tensors,
+                                                   const Attributes                &attributes)
 {
     ARM_COMPUTE_UNUSED(properties, attributes);
 
@@ -75,8 +75,8 @@ ClComponentLogits1DMaxShiftExpSum::ClComponentLogits1DMaxShiftExpSum(ComponentId
                                                                      const Properties                &properties,
                                                                      const ArgumentPack<ITensorInfo> &tensors,
                                                                      const Attributes                &attributes)
-    : IGpuKernelComponent{ id, properties, tensors },
-      _component_writer{ std::make_unique<ClTemplateLogits1DMaxShiftExpSum>(id, tensors, attributes) }
+    : IGpuKernelComponent{id, properties, tensors},
+      _component_writer{std::make_unique<ClTemplateLogits1DMaxShiftExpSum>(id, tensors, attributes)}
 {
 }
 
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h
index b5db458248..91ab5de3b5 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM
 
 #include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 
 namespace arm_compute
@@ -89,10 +90,8 @@ public:
      * |F16        | F16       | F16       |
      * |F32        | F32       | F32       |
      */
-    static Status validate(
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes);
+    static Status
+    validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
 
     /** Constructor
      *
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp
index 7864d56d29..fb2544385c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp
@@ -25,9 +25,10 @@
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
 
 #include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
 #include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
+
 #include "src/core/CL/CLValidate.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h"
 
@@ -37,10 +38,9 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-Status ClComponentLogits1DNorm::validate(
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors,
-    const Attributes                &attributes)
+Status ClComponentLogits1DNorm::validate(const Properties                &properties,
+                                         const ArgumentPack<ITensorInfo> &tensors,
+                                         const Attributes                &attributes)
 {
     ARM_COMPUTE_UNUSED(properties, attributes);
 
@@ -77,8 +77,8 @@ ClComponentLogits1DNorm::ClComponentLogits1DNorm(ComponentId
                                                  const Properties                &properties,
                                                  const ArgumentPack<ITensorInfo> &tensors,
                                                  const Attributes                &attributes)
-    : IGpuKernelComponent{ id, properties, tensors },
-      _component_writer{ std::make_unique<ClTemplateLogits1DNorm>(id, tensors, attributes) }
+    : IGpuKernelComponent{id, properties, tensors},
+      _component_writer{std::make_unique<ClTemplateLogits1DNorm>(id, tensors, attributes)}
 {
 }
 
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h
index 5bd350b9bd..74c0273604 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM
 
 #include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 
 namespace arm_compute
@@ -86,10 +87,8 @@ public:
      * |F16        | F16       | F16       |
      * |F32        | F32       | F32       |
      */
-    static Status validate(
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes);
+    static Status
+    validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
 
     /** Constructor
      *
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
index d415769094..409b191df5 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
@@ -24,13 +24,15 @@
 #include "ClComponentPool2d.h"
 
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
 #include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
+
 #include "src/core/CL/CLValidate.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h"
 #include "src/dynamic_fusion/utils/Utils.h"
+
 #include <memory>
 
 namespace arm_compute
@@ -39,23 +41,24 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-Status ClComponentPool2d::validate(
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors,
-    const Attributes                &attributes,
-    const Settings                  &settings)
+Status ClComponentPool2d::validate(const Properties                &properties,
+                                   const ArgumentPack<ITensorInfo> &tensors,
+                                   const Attributes                &attributes,
+                                   const Settings                  &settings)
 {
     ARM_COMPUTE_UNUSED(properties);
     const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
     const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
 
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
-    ARM_COMPUTE_ERROR_ON_MSG((attributes.pool_type() != PoolingType::AVG && attributes.pool_type() != PoolingType::MAX), "Unsupported Pooling type");
+    ARM_COMPUTE_ERROR_ON_MSG((attributes.pool_type() != PoolingType::AVG && attributes.pool_type() != PoolingType::MAX),
+                             "Unsupported Pooling type");
 
     // 1. Check validity
     // Check if pooling is valid
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_pool_region_entirely_outside_input(convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())),
-                                    "Pooling region that is entirely outside input tensor is unsupported");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+        is_pool_region_entirely_outside_input(convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())),
+        "Pooling region that is entirely outside input tensor is unsupported");
 
     // Matching data type
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
@@ -70,8 +73,9 @@ Status ClComponentPool2d::validate(
     // Device requirements are met
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
 
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(),
-                                                       misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())));
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(
+        dst->tensor_shape(), misc::shape_calculator::compute_pool_shape(
+                                 *src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())));
 
     // 2. Check support level
     // Data type
@@ -83,23 +87,16 @@ Status ClComponentPool2d::validate(
     return Status{};
 }
 
-ClComponentPool2d::ClComponentPool2d(
-    ComponentId                      id,
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors,
-    const Attributes                &attributes,
-    const Settings                  &settings)
-    : IGpuKernelComponent{ id, properties, tensors },
+ClComponentPool2d::ClComponentPool2d(ComponentId                      id,
+                                     const Properties                &properties,
+                                     const ArgumentPack<ITensorInfo> &tensors,
+                                     const Attributes                &attributes,
+                                     const Settings                  &settings)
+    : IGpuKernelComponent{id, properties, tensors},
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer
-{
-    std::make_unique<ClTemplatePool2d>(id, tensors, attributes, settings)
-}
+      _component_writer{std::make_unique<ClTemplatePool2d>(id, tensors, attributes, settings)}
 #else  //ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer
-{
-    std::make_unique<GpuCkwPool2d>(id, tensors, attributes, settings)
-}
+      _component_writer{std::make_unique<GpuCkwPool2d>(id, tensors, attributes, settings)}
 #endif //ACL_INTERNAL_TEST_CKW_IN_DF
 {
 }
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
index 6814bf9243..98fed65004 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
@@ -25,6 +25,7 @@
 #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTPOOL2D_H
 
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 
 namespace arm_compute
@@ -82,11 +83,10 @@ public:
      * |F16            |F16            |
      * |F32            |F32            |
      */
-    static Status validate(
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes,
-        const Settings                  &settings);
+    static Status validate(const Properties                &properties,
+                           const ArgumentPack<ITensorInfo> &tensors,
+                           const Attributes                &attributes,
+                           const Settings                  &settings);
 
     /** Constructor
      *
@@ -96,12 +96,11 @@ public:
      * @param[in]     attributes Component attributes
      * @param[in]     settings   Component settings
      */
-    ClComponentPool2d(
-        ComponentId                      id,
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes,
-        const Settings                  &settings);
+    ClComponentPool2d(ComponentId                      id,
+                      const Properties                &properties,
+                      const ArgumentPack<ITensorInfo> &tensors,
+                      const Attributes                &attributes,
+                      const Settings                  &settings);
 
     /** Destructor */
     ~ClComponentPool2d() override;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
index 66e2ee6956..0ece9de970 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
@@ -22,8 +22,10 @@
  * SOFTWARE.
  */
 #include "ClComponentReshape.h"
+
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Validate.h"
+
 #include "src/core/CL/CLValidate.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h"
 
@@ -49,12 +51,10 @@ Status ClComponentReshape::validate(const ArgumentPack<ITensorInfo> &tensors)
     return Status{};
 }
 
-ClComponentReshape::ClComponentReshape(
-    ComponentId                      id,
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors)
-    : IGpuKernelComponent{ id, properties, tensors },
-      _component_writer{ std::make_unique<ClTemplateReshape>(id, tensors) }
+ClComponentReshape::ClComponentReshape(ComponentId                      id,
+                                       const Properties                &properties,
+                                       const ArgumentPack<ITensorInfo> &tensors)
+    : IGpuKernelComponent{id, properties, tensors}, _component_writer{std::make_unique<ClTemplateReshape>(id, tensors)}
 {
 }
 ClComponentReshape::~ClComponentReshape()
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
index f8d165b4c8..78163d6603 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
@@ -73,10 +73,7 @@ public:
      * @param[in] properties Component properties @ref Properties
      * @param[in] tensors    Tensor arguments to the component
      */
-    ClComponentReshape(
-        ComponentId                      id,
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors);
+    ClComponentReshape(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors);
 
     /** Destructor */
     ~ClComponentReshape() override;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
index 6df1d9b3db..b05eb04698 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
@@ -66,7 +66,9 @@ Status ClComponentResize::validate(const IGpuKernelComponent::Properties &proper
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
 
     // Align corners and sampling policy conformance
-    ARM_COMPUTE_RETURN_ERROR_ON(attributes.align_corners() && !arm_compute::scale_utils::is_align_corners_allowed_sampling_policy(attributes.sampling_policy()));
+    ARM_COMPUTE_RETURN_ERROR_ON(
+        attributes.align_corners() &&
+        !arm_compute::scale_utils::is_align_corners_allowed_sampling_policy(attributes.sampling_policy()));
 
     // All tensor infos are initialized
     ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
@@ -79,11 +81,11 @@ ClComponentResize::ClComponentResize(ComponentId                            id,
                                      const IGpuKernelComponent::Properties &properties,
                                      const ArgumentPack<ITensorInfo>       &tensors,
                                      const ClComponentResize::Attributes   &attributes)
-    : IGpuKernelComponent{ id, properties, tensors },
+    : IGpuKernelComponent{id, properties, tensors},
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer{ std::make_unique<ClTemplateResize>(id, tensors, attributes) }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer{ std::make_unique<GpuCkwResize>(id, tensors, attributes) }
+      _component_writer{std::make_unique<ClTemplateResize>(id, tensors, attributes)}
+#else  // ACL_INTERNAL_TEST_CKW_IN_DF
+      _component_writer{std::make_unique<GpuCkwResize>(id, tensors, attributes)}
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 {
 }
@@ -94,7 +96,7 @@ ClComponentResize::~ClComponentResize()
 
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
 const IGpuTemplateComponentWriter *ClComponentResize::template_writer() const
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#else  // ACL_INTERNAL_TEST_CKW_IN_DF
 const IGpuCkwComponentDriver *ClComponentResize::ckw_component_driver() const
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 {
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
index 474524f8fc..29276c3257 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
@@ -26,6 +26,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE
 
 #include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 
 namespace arm_compute
@@ -43,7 +44,7 @@ class ArgumentPack;
 /** Forward declaration */
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
 class ClTemplateResize;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#else  // ACL_INTERNAL_TEST_CKW_IN_DF
 class GpuCkwResize;
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 
@@ -82,10 +83,8 @@ public:
      * |U8             |U8             |
      * |S16            |S16            |
      */
-    static Status validate(
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors,
-        const Attributes                &attributes);
+    static Status
+    validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
 
     /** Constructor
      *
@@ -114,7 +113,7 @@ public:
     /** Get writer for the component */
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
     const IGpuTemplateComponentWriter *template_writer() const override;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#else  // ACL_INTERNAL_TEST_CKW_IN_DF
     const IGpuCkwComponentDriver *ckw_component_driver() const override;
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 
@@ -127,7 +126,7 @@ public:
 private:
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
     std::unique_ptr<ClTemplateResize> _component_writer;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#else  // ACL_INTERNAL_TEST_CKW_IN_DF
     std::unique_ptr<GpuCkwResize> _component_writer;
 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
 };
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
index 12b81c3d56..dcbecaff35 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
@@ -38,25 +38,19 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-Status ClComponentStore::validate(
-    const Properties                &properties,
-    const ArgumentPack<ITensorInfo> &tensors)
+Status ClComponentStore::validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors)
 {
     ARM_COMPUTE_UNUSED(properties, tensors);
     return Status{};
 }
-ClComponentStore::ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors)
-    : IGpuKernelComponent{ id, properties, tensors },
+ClComponentStore::ClComponentStore(ComponentId                      id,
+                                   const Properties                &properties,
+                                   const ArgumentPack<ITensorInfo> &tensors)
+    : IGpuKernelComponent{id, properties, tensors},
 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer
-{
-    std::make_unique<ClTemplateStore>(id, tensors)
-}
+      _component_writer{std::make_unique<ClTemplateStore>(id, tensors)}
 #else  //ACL_INTERNAL_TEST_CKW_IN_DF
-      _component_writer
-{
-    std::make_unique<GpuCkwStore>(id, tensors)
-}
+      _component_writer{std::make_unique<GpuCkwStore>(id, tensors)}
 #endif //ACL_INTERNAL_TEST_CKW_IN_DF
 {
 }
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
index 853ee39012..948785c480 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
 
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
 #include <memory>
 
 namespace arm_compute
@@ -70,9 +71,7 @@ public:
      * |:--------------|:--------------|
      * |All            |All            |
      */
-    static Status validate(
-        const Properties                &properties,
-        const ArgumentPack<ITensorInfo> &tensors);
+    static Status validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors);
     /** Constructor
      *
      * Similar to @ref ClComponentStore::validate()
diff --git a/src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h
index bc7133f4df..4c3e84e59d 100644
--- a/src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h
@@ -46,18 +46,16 @@ using namespace experimental::dynamic_fusion;
  */
 inline ::std::ostream &operator<<(::std::ostream &os, const ClComponentElementwiseBinary::Attributes::ElementwiseOp &op)
 {
-    const std::map<ClComponentElementwiseBinary::Attributes::ElementwiseOp, std::string> op_name =
-    {
-        { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Add, "add" },
-        { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Div, "div" },
-        { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Max, "max" },
-        { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Min, "min" },
-        { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Mul, "mul" },
-        { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Power, "power" },
-        { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Prelu, "prelu" },
-        { ClComponentElementwiseBinary::Attributes::ElementwiseOp::SquaredDiff, "squareddiff" },
-        { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Sub, "sub" }
-    };
+    const std::map<ClComponentElementwiseBinary::Attributes::ElementwiseOp, std::string> op_name = {
+        {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Add, "add"},
+        {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Div, "div"},
+        {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Max, "max"},
+        {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Min, "min"},
+        {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Mul, "mul"},
+        {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Power, "power"},
+        {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Prelu, "prelu"},
+        {ClComponentElementwiseBinary::Attributes::ElementwiseOp::SquaredDiff, "squareddiff"},
+        {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Sub, "sub"}};
     os << op_name.at(op);
     return os;
 }
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
index e7ee1c10df..2cec67dc65 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h"
+
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
 
 #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
@@ -32,12 +33,11 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch,
-                           const ITensorInfo       *lhs,
-                           const ITensorInfo       *rhs)
+Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8,
+                                                         DataType::S16, DataType::S32);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
 
     // Set the elementwise operation to Add then call the elementwise common validate_op
@@ -46,12 +46,11 @@ Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch,
     return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
 }
 
-Status GpuAdd::is_supported_op(const GpuWorkloadContext &context,
-                               const ITensorInfo        *lhs,
-                               const ITensorInfo        *rhs)
+Status GpuAdd::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8,
+                                                         DataType::S16, DataType::S32);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
 
     // Set the elementwise operation to Add then call the elementwise common is_supported_op
@@ -60,9 +59,7 @@ Status GpuAdd::is_supported_op(const GpuWorkloadContext &context,
     return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
 }
 
-ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch,
-                               ITensorInfo       *lhs,
-                               ITensorInfo       *rhs)
+ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch, ITensorInfo *lhs, ITensorInfo *rhs)
 {
     // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op()
     // Set the elementwise operation to Add then call the elementwise common create_op
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp
index 33c2d43e07..6f35e66ea8 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp
@@ -23,12 +23,11 @@
  */
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h"
 
+#include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h"
-
-#include "src/common/utils/Log.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 
 namespace arm_compute
 {
@@ -49,7 +48,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
     TensorInfo         dst_info_to_validate;
     const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-    if(dst != nullptr)
+    if (dst != nullptr)
     {
         dst_info_to_validate_ptr = dst;
     }
@@ -58,25 +57,22 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
 
     // Check support level
     // Data Type
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src,
-                                                         1,
-                                                         DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::S16,
-                                                         DataType::U16, DataType::U32, DataType::S32, DataType::F16,
-                                                         DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst_info_to_validate_ptr,
-                                                         1,
-                                                         DataType::U8, DataType::S8, DataType::QASYMM8, DataType::S16,
-                                                         DataType::U16, DataType::U32, DataType::S32, DataType::F16,
-                                                         DataType::F32);
-
-    if(context.gpu_language() == GpuLanguage::OpenCL)
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
+        src, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL,
+        DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst_info_to_validate_ptr, 1, DataType::U8, DataType::S8,
+                                                         DataType::QASYMM8, DataType::S16, DataType::U16, DataType::U32,
+                                                         DataType::S32, DataType::F16, DataType::F32);
+
+    if (context.gpu_language() == GpuLanguage::OpenCL)
     {
         const auto cl_compile_ctx = context.cl_compile_context();
         ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
         // Validate Cast Component
         {
-            const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
-            auto       settings   = ClComponentCast::Settings();
+            const auto properties =
+                IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+            auto settings = ClComponentCast::Settings();
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
@@ -94,16 +90,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
 constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
 } // namespace
 
-Status GpuCast::is_supported_op(const GpuWorkloadContext &context,
-                                const ITensorInfo        *src,
-                                const CastAttributes     &attributes)
+Status
+GpuCast::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const CastAttributes &attributes)
 {
     return is_supported_op_helper(context, src, nullptr, attributes);
 }
 
-Status GpuCast::validate_op(const GpuWorkloadSketch &sketch,
-                            const ITensorInfo       *src,
-                            const CastAttributes    &attributes)
+Status GpuCast::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const CastAttributes &attributes)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
@@ -127,9 +120,7 @@ Status GpuCast::validate_op(const GpuWorkloadSketch &sketch,
     return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
 }
 
-ITensorInfo *GpuCast::create_op(GpuWorkloadSketch    &sketch,
-                                ITensorInfo          *src,
-                                const CastAttributes &attributes)
+ITensorInfo *GpuCast::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const CastAttributes &attributes)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_LOG_PARAMS(src, attributes);
@@ -145,14 +136,15 @@ ITensorInfo *GpuCast::create_op(GpuWorkloadSketch    &sketch,
     GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
     const auto              *sketch_ctx = sketch.implementation().context();
 
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         ARM_COMPUTE_ERROR_ON(sketch_ctx->cl_compile_context() == nullptr);
 
         // Add Depthwise Conv2d Component
         {
-            const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
-            auto       settings   = ClComponentCast::Settings();
+            const auto properties =
+                IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+            auto settings = ClComponentCast::Settings();
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
index 89b533c9b8..697b7d4e1f 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
@@ -25,14 +25,13 @@
 
 #include "arm_compute/core/experimental/Types.h"
 
+#include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
 
-#include "src/common/utils/Log.h"
-
 namespace arm_compute
 {
 namespace experimental
@@ -48,12 +47,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(), "Maximum clamp value cannot be lower than minimum value");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(),
+                                    "Maximum clamp value cannot be lower than minimum value");
 
     TensorInfo         dst_info_to_validate;
     const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-    if(dst != nullptr)
+    if (dst != nullptr)
     {
         dst_info_to_validate_ptr = dst;
     }
@@ -61,16 +61,15 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
     auto_init_if_empty(dst_info_to_validate, *src->clone());
 
     // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
-    const ClComponentActivation::Attributes act_info
-    {
-        ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val()
-    };
+    const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+                                                     attributes.max_val(), attributes.min_val()};
 
     // Check components
-    if(context.gpu_language() == GpuLanguage::OpenCL)
+    if (context.gpu_language() == GpuLanguage::OpenCL)
     {
         // Validate Activation Component
-        const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+        const auto properties =
+            IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
         ArgumentPack<ITensorInfo> arguments;
         arguments.add_const_tensor(ACL_SRC, src);
@@ -87,16 +86,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
 constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
 } // namespace
 
-Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
-                                 const ITensorInfo        *src,
-                                 const ClampAttributes    &attributes)
+Status
+GpuClamp::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const ClampAttributes &attributes)
 {
     return is_supported_op_helper(context, src, nullptr, attributes);
 }
 
-Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch,
-                             const ITensorInfo       *src,
-                             const ClampAttributes   &attributes)
+Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ClampAttributes &attributes)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
 
@@ -121,9 +117,7 @@ Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch,
     return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
 }
 
-ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch     &sketch,
-                                 ITensorInfo           *src,
-                                 const ClampAttributes &attributes)
+ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const ClampAttributes &attributes)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_LOG_PARAMS(src, attributes);
@@ -139,18 +133,16 @@ ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch     &sketch,
     GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
 
     // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
-    const ClComponentActivation::Attributes act_info
-    {
-        ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val()
-    };
+    const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+                                                     attributes.max_val(), attributes.min_val()};
 
     const auto *const sketch_ctx = sketch.implementation().context();
 
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         // Add Activation Component
         auto properties = IGpuKernelComponent::Properties();
-        properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+        properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
         ArgumentPack<ITensorInfo> arguments;
         arguments.add_const_tensor(ACL_SRC, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
index cb270ed4b0..aaeec543f8 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -24,15 +24,15 @@
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h"
 
 #include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 
 #include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
 #include "src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h"
 #include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
@@ -45,24 +45,30 @@ namespace dynamic_fusion
 {
 namespace
 {
-DirectConvComputeKernelInfo config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info)
+DirectConvComputeKernelInfo
+config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info)
 {
     // Get GPU target
     GPUTarget gpu_target = CLScheduler::get().target();
 
-    std::unique_ptr<arm_compute::cl_direct_conv::IClDirectConvKernelConfig> t = arm_compute::cl_direct_conv::ClDirectConvKernelConfigurationFactory::create(gpu_target);
+    std::unique_ptr<arm_compute::cl_direct_conv::IClDirectConvKernelConfig> t =
+        arm_compute::cl_direct_conv::ClDirectConvKernelConfigurationFactory::create(gpu_target);
 
     return t->configure(src, weights, conv_info);
 }
 
-void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const ITensorInfo *wei, const Conv2dAttributes &attributes)
+void calculate_and_init_dst_if_empty(ITensorInfo            *dst,
+                                     const ITensorInfo      *src,
+                                     const ITensorInfo      *wei,
+                                     const Conv2dAttributes &attributes)
 {
-    if(dst->total_size() == 0U)
+    if (dst->total_size() == 0U)
     {
-        const auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), src->data_layout(), wei->tensor_shape(),
-                                                                                  PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
-                                                                                                attributes.pad().right,
-                                                                                                attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
+        const auto shape = misc::shape_calculator::compute_deep_convolution_shape(
+            src->tensor_shape(), src->data_layout(), wei->tensor_shape(),
+            PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+                          attributes.pad().right, attributes.pad().top, attributes.pad().bottom,
+                          DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
 
         auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));
     }
@@ -83,7 +89,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
     TensorInfo         dst_info_to_validate;
     const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-    if(dst != nullptr)
+    if (dst != nullptr)
     {
         dst_info_to_validate_ptr = dst;
     }
@@ -98,18 +104,20 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
 
     // Check components
     const auto gpu_target = context.gpu_target();
-    if(context.gpu_language() == GpuLanguage::OpenCL)
+    if (context.gpu_language() == GpuLanguage::OpenCL)
     {
         const auto cl_compile_ctx = context.cl_compile_context();
         ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
         // Validate Direct Conv2d Component
         {
-            const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
-            auto       settings   = ClComponentDirectConv2d::Settings();
+            const auto properties =
+                IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+            auto settings = ClComponentDirectConv2d::Settings();
 
             settings.fast_relaxed_math(
-                (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
-                && (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16));
+                (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) &&
+                (dst_info_to_validate_ptr->data_type() == DataType::F32 ||
+                 dst_info_to_validate_ptr->data_type() == DataType::F16));
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
@@ -142,14 +150,14 @@ Status GpuConv2d::validate_op(const GpuWorkloadSketch &sketch,
                               const ITensorInfo       *src,
                               const ITensorInfo       *wei,
                               const ITensorInfo       *bia,
-                              const Conv2dAttributes &attributes)
+                              const Conv2dAttributes  &attributes)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(!wei->are_values_constant(), "Dynamic weights are not supported");
 
     // Check if tensors have valid id. I.e. they are created from a sketch
     ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id());
-    if(bia != nullptr)
+    if (bia != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id());
     }
@@ -178,16 +186,13 @@ Status GpuConv2d::validate_op(const GpuWorkloadSketch &sketch,
     return is_supported_op_helper(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes);
 }
 
-ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch      &sketch,
-                                  ITensorInfo            *src,
-                                  ITensorInfo            *wei,
-                                  ITensorInfo            *bia,
-                                  const Conv2dAttributes &attributes)
+ITensorInfo *GpuConv2d::create_op(
+    GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorInfo *wei, ITensorInfo *bia, const Conv2dAttributes &attributes)
 {
     ARM_COMPUTE_LOG_PARAMS(src, wei, bia, attributes);
     PadStrideInfo conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
-                            attributes.pad().right,
-                            attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
+                            attributes.pad().right, attributes.pad().top, attributes.pad().bottom,
+                            DimensionRoundingType::FLOOR);
     // Initialize the direct convolution descriptor
     const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, wei, conv_info);
 
@@ -207,7 +212,7 @@ ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch      &sketch,
 
     const auto gpu_target = sketch_ctx->gpu_target();
 
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
         ARM_COMPUTE_ERROR_ON(cl_compile_ctx == nullptr);
@@ -216,17 +221,17 @@ ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch      &sketch,
         // Add Direct Conv2d Component
         {
             auto properties = IGpuKernelComponent::Properties();
-            properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+            properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
             auto settings = ClComponentDirectConv2d::Settings();
 
             settings.fast_relaxed_math(
-                (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
-                && (dst->data_type() == DataType::F32 || dst->data_type() == DataType::F16));
+                (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) &&
+                (dst->data_type() == DataType::F32 || dst->data_type() == DataType::F16));
 
             settings.direct_conv_descriptor(desc);
 
-            if(settings.export_to_cl_image())
+            if (settings.export_to_cl_image())
             {
                 arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei);
             }
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
index c72098e943..e2b673bd43 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
@@ -28,8 +28,8 @@
 #include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
 #include "src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h"
 #include "src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h"
@@ -42,20 +42,20 @@ namespace dynamic_fusion
 {
 namespace
 {
-void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const ITensorInfo *wei, const DepthwiseConv2dAttributes &attributes)
+void calculate_and_init_dst_if_empty(ITensorInfo                     *dst,
+                                     const ITensorInfo               *src,
+                                     const ITensorInfo               *wei,
+                                     const DepthwiseConv2dAttributes &attributes)
 {
-    if(dst->total_size() == 0U)
+    if (dst->total_size() == 0U)
     {
-        const PadStrideInfo pad_stride_info(attributes.stride().x(),
-                                            attributes.stride().y(),
-                                            attributes.pad().left,
-                                            attributes.pad().right,
-                                            attributes.pad().top,
-                                            attributes.pad().bottom,
+        const PadStrideInfo pad_stride_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+                                            attributes.pad().right, attributes.pad().top, attributes.pad().bottom,
                                             attributes.dimension_rounding_type());
 
-        const ConvolutionInfo conv_info{ pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(), attributes.dilation() };
-        const TensorShape     shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *wei, conv_info);
+        const ConvolutionInfo conv_info{pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(),
+                                        attributes.dilation()};
+        const TensorShape shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *wei, conv_info);
 
         auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));
     }
@@ -76,7 +76,7 @@ Status is_supported_op_helper(const GpuWorkloadContext        &context,
     TensorInfo         dst_info_to_validate;
     const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-    if(dst != nullptr)
+    if (dst != nullptr)
     {
         dst_info_to_validate_ptr = dst;
     }
@@ -91,40 +91,44 @@ Status is_supported_op_helper(const GpuWorkloadContext        &context,
 
     const GpuTarget gpu_target = context.gpu_target();
 
-    if(context.gpu_language() == GpuLanguage::OpenCL)
+    if (context.gpu_language() == GpuLanguage::OpenCL)
     {
         const CLCompileContext *cl_compile_ctx = context.cl_compile_context();
         ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
 
         // Validate Depthwise Conv2d Component
         {
-            const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
-            auto       settings   = ClComponentDepthwiseConv2d::Settings();
+            const auto properties =
+                IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+            auto settings = ClComponentDepthwiseConv2d::Settings();
 
-            const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
-                                                 attributes.pad().right,
-                                                 attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
+            const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(),
+                                                 attributes.pad().left, attributes.pad().right, attributes.pad().top,
+                                                 attributes.pad().bottom, DimensionRoundingType::FLOOR);
 
             // Get the depthwise convolution compute parameters
-            auto                       t        = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
-            const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
+            auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
+            const DWCComputeKernelInfo dwc_info =
+                t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
 
             settings.fast_relaxed_math(
-                (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
-                && (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16));
+                (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) &&
+                (dst_info_to_validate_ptr->data_type() == DataType::F32 ||
+                 dst_info_to_validate_ptr->data_type() == DataType::F16));
 
             settings.is_fma_available(get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
-            .m0(dwc_info.m0)
-            .n0(dwc_info.n0)
-            .export_input_to_cl_image(dwc_info.export_input_to_cl_image)
-            .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image);
+                .m0(dwc_info.m0)
+                .n0(dwc_info.n0)
+                .export_input_to_cl_image(dwc_info.export_input_to_cl_image)
+                .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image);
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
             arguments.add_const_tensor(ACL_SRC_1, wei);
             arguments.add_const_tensor(ACL_SRC_2, bia);
             arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
-            ARM_COMPUTE_RETURN_ON_ERROR(ClComponentDepthwiseConv2d::validate(properties, arguments, attributes, settings));
+            ARM_COMPUTE_RETURN_ON_ERROR(
+                ClComponentDepthwiseConv2d::validate(properties, arguments, attributes, settings));
         }
     }
     else
@@ -158,7 +162,7 @@ Status GpuDepthwiseConv2d::validate_op(const GpuWorkloadSketch         &sketch,
 
     ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id());
 
-    if(bia != nullptr)
+    if (bia != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id());
     }
@@ -205,35 +209,37 @@ ITensorInfo *GpuDepthwiseConv2d::create_op(GpuWorkloadSketch               &sket
     const auto              *sketch_ctx = sketch.implementation().context();
     const GpuTarget          gpu_target = sketch_ctx->gpu_target();
 
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         ARM_COMPUTE_ERROR_ON_NULLPTR(sketch_ctx->cl_compile_context());
 
         // Add Depthwise Conv2d Component
         {
-            const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
-            auto       settings   = ClComponentDepthwiseConv2d::Settings();
+            const auto properties =
+                IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+            auto settings = ClComponentDepthwiseConv2d::Settings();
 
-            const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
-                                                 attributes.pad().right,
-                                                 attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
+            const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(),
+                                                 attributes.pad().left, attributes.pad().right, attributes.pad().top,
+                                                 attributes.pad().bottom, DimensionRoundingType::FLOOR);
 
             // Get the depthwise convolution compute parameters
-            auto                       t        = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
-            const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
+            auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
+            const DWCComputeKernelInfo dwc_info =
+                t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
 
             settings.is_fma_available(get_arch_from_target(gpu_target) != GPUTarget::MIDGARD)
-            .m0(dwc_info.m0)
-            .n0(dwc_info.n0)
-            .export_input_to_cl_image(dwc_info.export_input_to_cl_image)
-            .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image);
+                .m0(dwc_info.m0)
+                .n0(dwc_info.n0)
+                .export_input_to_cl_image(dwc_info.export_input_to_cl_image)
+                .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image);
 
-            if(settings.export_input_to_cl_image())
+            if (settings.export_input_to_cl_image())
             {
                 arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(src);
             }
 
-            if(settings.export_weights_to_cl_image())
+            if (settings.export_weights_to_cl_image())
             {
                 arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei);
             }
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp
index 464a32cbad..b871171e8d 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h"
+
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
 
 #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
@@ -32,9 +33,7 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-Status GpuMul::validate_op(const GpuWorkloadSketch &sketch,
-                           const ITensorInfo       *lhs,
-                           const ITensorInfo       *rhs)
+Status GpuMul::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
@@ -46,9 +45,7 @@ Status GpuMul::validate_op(const GpuWorkloadSketch &sketch,
     return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
 }
 
-Status GpuMul::is_supported_op(const GpuWorkloadContext &context,
-                               const ITensorInfo        *lhs,
-                               const ITensorInfo        *rhs)
+Status GpuMul::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
@@ -60,9 +57,7 @@ Status GpuMul::is_supported_op(const GpuWorkloadContext &context,
     return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
 }
 
-ITensorInfo *GpuMul::create_op(GpuWorkloadSketch &sketch,
-                               ITensorInfo       *lhs,
-                               ITensorInfo       *rhs)
+ITensorInfo *GpuMul::create_op(GpuWorkloadSketch &sketch, ITensorInfo *lhs, ITensorInfo *rhs)
 {
     // Set the elementwise operation to Mul then call the elementwise common create_op
     ElementwiseBinaryCommonAttributes common_attributes{};
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
index 107a5e5fa7..f0d368d757 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
@@ -26,10 +26,9 @@
 
 #include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
-
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/utils/Utils.h"
 
 namespace arm_compute
@@ -43,9 +42,7 @@ namespace
 constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
 } // namespace
 
-Status GpuOutput::is_supported_op(const GpuWorkloadContext &context,
-                                  const ITensorInfo        *src,
-                                  const ITensorInfo        *dst)
+Status GpuOutput::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *dst)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
 
@@ -60,9 +57,7 @@ Status GpuOutput::is_supported_op(const GpuWorkloadContext &context,
     return Status{};
 }
 
-Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch,
-                              const ITensorInfo       *src,
-                              const ITensorInfo       *dst)
+Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ITensorInfo *dst)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
@@ -90,9 +85,7 @@ Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch,
     return status;
 }
 
-void GpuOutput::create_op(GpuWorkloadSketch &sketch,
-                          ITensorInfo       *src,
-                          ITensorInfo       *dst)
+void GpuOutput::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorInfo *dst)
 {
     ARM_COMPUTE_LOG_PARAMS(src, dst);
     ARM_COMPUTE_ERROR_THROW_ON(GpuOutput::validate_op(sketch, src, dst));
@@ -104,14 +97,14 @@ void GpuOutput::create_op(GpuWorkloadSketch &sketch,
     auto      &comp_graph = sketch.implementation().component_graph();
     const auto sketch_ctx = sketch.implementation().context();
 
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         ARM_COMPUTE_ERROR_ON(sketch_ctx->cl_compile_context() == nullptr);
 
         // Add store component
         {
             IGpuKernelComponent::Properties properties;
-            properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+            properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp
index 7ecfa0158b..55c604aacc 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp
@@ -22,20 +22,21 @@
  * SOFTWARE.
  */
 
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
 #include "arm_compute/core/CL/CLCompileContext.h"
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/experimental/Types.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
-
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
-#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
 #include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
 #include "src/dynamic_fusion/utils/Utils.h"
 
 namespace arm_compute
@@ -46,11 +47,15 @@ namespace dynamic_fusion
 {
 namespace
 {
-void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const Pool2dAttributes &attributes, const GpuPool2dSettings &settings)
+void calculate_and_init_dst_if_empty(ITensorInfo             *dst,
+                                     const ITensorInfo       *src,
+                                     const Pool2dAttributes  &attributes,
+                                     const GpuPool2dSettings &settings)
 {
-    if(dst->total_size() == 0U)
+    if (dst->total_size() == 0U)
     {
-        auto shape = misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision()));
+        auto shape = misc::shape_calculator::compute_pool_shape(
+            *src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision()));
         auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));
     }
 }
@@ -82,7 +87,7 @@ bool GpuPool2dSettings::use_inf_as_limit() const
 
 Status GpuPool2d::validate_op(const GpuWorkloadSketch &sketch,
                               const ITensorInfo       *src,
-                              const Pool2dAttributes &attributes,
+                              const Pool2dAttributes  &attributes,
                               const GpuPool2dSettings &settings)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
@@ -110,7 +115,7 @@ Status GpuPool2d::validate_op(const GpuWorkloadSketch &sketch,
 Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context,
                                   const ITensorInfo        *src,
                                   const Pool2dAttributes   &attributes,
-                                  const GpuPool2dSettings &settings)
+                                  const GpuPool2dSettings  &settings)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
     // Data type
@@ -118,7 +123,8 @@ Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context,
     // Data layout
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
     // Check exclude padding is not false
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(!attributes.exclude_padding(), "Exclude padding must be set to true in Attributes!");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(!attributes.exclude_padding(),
+                                    "Exclude padding must be set to true in Attributes!");
 
     // Auto initialize dst tensor info
     TensorInfo dst_info_to_validate;
@@ -126,14 +132,15 @@ Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context,
     calculate_and_init_dst_if_empty(&dst_info_to_validate, src, attributes, settings);
 
     // Check components
-    if(context.gpu_language() == GpuLanguage::OpenCL)
+    if (context.gpu_language() == GpuLanguage::OpenCL)
     {
         const auto cl_compile_ctx = context.cl_compile_context();
         ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
 
         // Validate Component
         {
-            const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+            const KernelProperties properties =
+                IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
@@ -148,10 +155,10 @@ Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context,
     return Status{};
 }
 
-ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch  &sketch,
-                          ITensorInfo                *src,
-                          const Pool2dAttributes     &attributes,
-                          const GpuPool2dSettings    &settings)
+ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch       &sketch,
+                                  ITensorInfo             *src,
+                                  const Pool2dAttributes  &attributes,
+                                  const GpuPool2dSettings &settings)
 {
     // Assert validation
     ARM_COMPUTE_ERROR_THROW_ON(GpuPool2d::validate_op(sketch, src, attributes, settings));
@@ -168,7 +175,7 @@ ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch  &sketch,
 
     const auto sketch_ctx = sketch.implementation().context();
 
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
         ARM_COMPUTE_UNUSED(cl_compile_ctx);
@@ -177,7 +184,7 @@ ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch  &sketch,
         // Add Component
         {
             auto properties = IGpuKernelComponent::Properties();
-            properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+            properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp
index 0f43a578df..3def7a1a81 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp
@@ -22,12 +22,14 @@
  * SOFTWARE.
  */
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h"
+
 #include "arm_compute/core/Error.h"
+
 #include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 
 namespace arm_compute
 {
@@ -40,14 +42,14 @@ namespace
 Status is_supported_op_helper(const GpuWorkloadContext &context,
                               const ITensorInfo        *src,
                               const ITensorInfo        *dst,
-                              const ReshapeAttributes &attributes)
+                              const ReshapeAttributes  &attributes)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
 
     TensorInfo         dst_info_to_validate;
     const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-    if(dst != nullptr)
+    if (dst != nullptr)
     {
         dst_info_to_validate_ptr = dst;
     }
@@ -55,7 +57,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
     auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(attributes.shape()));
 
     // Check components
-    if(context.gpu_language() == GpuLanguage::OpenCL)
+    if (context.gpu_language() == GpuLanguage::OpenCL)
     {
         const auto cl_compile_ctx = context.cl_compile_context();
         ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
@@ -78,16 +80,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
 GpuOperatorType operator_type = GpuOperatorType::Complex;
 } // namespace
 
-Status GpuReshape::is_supported_op(const GpuWorkloadContext &context,
-                                   const ITensorInfo        *src,
-                                   const Attributes         &attributes)
+Status
+GpuReshape::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const Attributes &attributes)
 {
     return is_supported_op_helper(context, src, nullptr, attributes);
 }
 
-Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch,
-                               const ITensorInfo       *src,
-                               const Attributes        &attributes)
+Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const Attributes &attributes)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
@@ -111,9 +110,7 @@ Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch,
     return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
 }
 
-ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch,
-                                   ITensorInfo       *src,
-                                   const Attributes &attributes)
+ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const Attributes &attributes)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_LOG_PARAMS(src, attributes.shape());
@@ -127,7 +124,7 @@ ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch,
     // Translate into components and add to component graph
     auto      &comp_graph = sketch.implementation().component_graph();
     const auto sketch_ctx = sketch.implementation().context();
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
         ARM_COMPUTE_UNUSED(cl_compile_ctx);
@@ -136,7 +133,7 @@ ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch,
         // Add ElementwiseBinary Component
         {
             auto properties = IGpuKernelComponent::Properties();
-            properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+            properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp
index 5f52eea7d0..fb09875b33 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp
@@ -26,12 +26,12 @@
 
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h"
-
-#include "src/common/utils/Log.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 
 namespace arm_compute
 {
@@ -43,7 +43,7 @@ namespace
 {
 void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const ResizeAttributes &attributes)
 {
-    if(dst->total_size() == 0U)
+    if (dst->total_size() == 0U)
     {
         TensorShape out_shape = src->tensor_shape();
 
@@ -64,7 +64,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
     TensorInfo         dst_info_to_validate;
     const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-    if(dst != nullptr)
+    if (dst != nullptr)
     {
         dst_info_to_validate_ptr = dst;
     }
@@ -73,22 +73,25 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
 
     // Check support level
     // Data type
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+                                                         DataType::U8, DataType::S16, DataType::F16, DataType::F32);
     // Data layout
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
     // Interpolation policy
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.interpolation_policy() != InterpolationPolicy::NEAREST_NEIGHBOR && attributes.interpolation_policy() != InterpolationPolicy::BILINEAR,
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.interpolation_policy() != InterpolationPolicy::NEAREST_NEIGHBOR &&
+                                        attributes.interpolation_policy() != InterpolationPolicy::BILINEAR,
                                     "Interpolation policy must be NEAREST_NEIGHBOR or BILINEAR");
 
     // Check components
-    if(context.gpu_language() == GpuLanguage::OpenCL)
+    if (context.gpu_language() == GpuLanguage::OpenCL)
     {
         const auto cl_compile_ctx = context.cl_compile_context();
         ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
 
         // Validate Activation Component
         {
-            const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+            const KernelProperties properties =
+                IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
@@ -107,16 +110,14 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
 constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
 } // namespace
 
-Status GpuResize::is_supported_op(const GpuWorkloadContext &context,
-                                  const ITensorInfo        *src,
-                                  const Attributes         &attributes)
+Status
+GpuResize::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const Attributes &attributes)
 {
     return is_supported_op_helper(context, src, nullptr, attributes);
 }
 
-Status GpuResize::validate_op(const GpuWorkloadSketch     &sketch,
-                              const ITensorInfo           *src,
-                              const GpuResize::Attributes &attributes)
+Status
+GpuResize::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const GpuResize::Attributes &attributes)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
@@ -141,9 +142,7 @@ Status GpuResize::validate_op(const GpuWorkloadSketch     &sketch,
     return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
 }
 
-ITensorInfo *GpuResize::create_op(GpuWorkloadSketch           &sketch,
-                                  ITensorInfo                 *src,
-                                  const GpuResize::Attributes &attributes)
+ITensorInfo *GpuResize::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const GpuResize::Attributes &attributes)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_LOG_PARAMS(src, attributes);
@@ -159,13 +158,14 @@ ITensorInfo *GpuResize::create_op(GpuWorkloadSketch           &sketch,
     GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
     const auto              *sketch_ctx = sketch.implementation().context();
 
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         ARM_COMPUTE_ERROR_ON_NULLPTR(sketch_ctx->cl_compile_context());
 
         // Add Resize Component
         {
-            const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+            const auto properties =
+                IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp
index 09debad969..a2260c8c36 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp
@@ -23,14 +23,15 @@
  */
 
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.h"
+
 #include "arm_compute/core/experimental/Types.h"
 #include "arm_compute/function_info/ActivationLayerInfo.h"
 
 #include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 
 namespace arm_compute
 {
@@ -40,9 +41,7 @@ namespace dynamic_fusion
 {
 namespace
 {
-Status is_supported_op_helper(const GpuWorkloadContext &context,
-                              const ITensorInfo        *src,
-                              const ITensorInfo        *dst)
+Status is_supported_op_helper(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *dst)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
@@ -50,20 +49,21 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
     TensorInfo         dst_info_to_validate;
     const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-    if(dst != nullptr)
+    if (dst != nullptr)
     {
         dst_info_to_validate_ptr = dst;
     }
 
     auto_init_if_empty(dst_info_to_validate, *src->clone());
 
-    const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LOGISTIC };
+    const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LOGISTIC};
 
     // Check components
-    if(context.gpu_language() == GpuLanguage::OpenCL)
+    if (context.gpu_language() == GpuLanguage::OpenCL)
     {
         // Validate Activation Component
-        const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+        const auto properties =
+            IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
         ArgumentPack<ITensorInfo> arguments;
         arguments.add_const_tensor(ACL_SRC, src);
@@ -80,14 +80,12 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
 constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
 } // namespace
 
-Status GpuSigmoid::is_supported_op(const GpuWorkloadContext &context,
-                                   const ITensorInfo        *src)
+Status GpuSigmoid::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src)
 {
     return is_supported_op_helper(context, src, nullptr);
 }
 
-Status GpuSigmoid::validate_op(const GpuWorkloadSketch &sketch,
-                               const ITensorInfo       *src)
+Status GpuSigmoid::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
 
@@ -112,8 +110,7 @@ Status GpuSigmoid::validate_op(const GpuWorkloadSketch &sketch,
     return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate);
 }
 
-ITensorInfo *GpuSigmoid::create_op(GpuWorkloadSketch &sketch,
-                                   ITensorInfo       *src)
+ITensorInfo *GpuSigmoid::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_LOG_PARAMS(src);
@@ -128,15 +125,15 @@ ITensorInfo *GpuSigmoid::create_op(GpuWorkloadSketch &sketch,
     // Translate into components and add to component graph
     GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
 
-    const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LOGISTIC };
+    const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LOGISTIC};
 
     const auto *const sketch_ctx = sketch.implementation().context();
 
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         // Add Activation Component
         auto properties = IGpuKernelComponent::Properties();
-        properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+        properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
         ArgumentPack<ITensorInfo> arguments;
         arguments.add_const_tensor(ACL_SRC, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
index ffc4553a7d..c87b282aec 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
@@ -22,13 +22,14 @@
  * SOFTWARE.
  */
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.h"
+
 #include "arm_compute/core/Error.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
 
 #include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 
@@ -52,7 +53,7 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context,
     TensorInfo dst_info_to_validate;
 
     // Auto initialize dst tensor info
-    if(dst != nullptr)
+    if (dst != nullptr)
     {
         dst_info_to_validate = *dst;
     }
@@ -61,11 +62,12 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context,
         auto_init_if_empty(dst_info_to_validate, *src->clone());
     }
     // Check components
-    if(context.gpu_language() == GpuLanguage::OpenCL)
+    if (context.gpu_language() == GpuLanguage::OpenCL)
     {
         const auto cl_compile_ctx = context.cl_compile_context();
         ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
-        const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+        const KernelProperties properties =
+            IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
         TensorShape logits_sum_shape = src->tensor_shape();
         TensorInfo  logits(src->clone()->set_tensor_shape(logits_sum_shape));
@@ -86,7 +88,8 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context,
         arguments_norm.add_const_tensor(ACL_SRC_1, &sum);
         arguments_norm.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
 
-        ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes));
+        ARM_COMPUTE_RETURN_ON_ERROR(
+            ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes));
         ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DNorm::validate(properties, arguments_norm, attributes));
     }
     else
@@ -105,14 +108,16 @@ Status GpuSoftmax::validate_op(const GpuWorkloadSketch &sketch,
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id());
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->num_dimensions() > 4, "Only up to 4 dimensions are supported");
-    ARM_COMPUTE_RETURN_ERROR_ON(attributes.axis() < static_cast<int32_t>(-src->num_dimensions()) || static_cast<int32_t>(src->num_dimensions()) <= attributes.axis());
+    ARM_COMPUTE_RETURN_ERROR_ON(attributes.axis() < static_cast<int32_t>(-src->num_dimensions()) ||
+                                static_cast<int32_t>(src->num_dimensions()) <= attributes.axis());
 
     // Auto initialize dst tensor info
     TensorInfo dst_info_to_validate = *dst;
     auto_init_if_empty(dst_info_to_validate, *src->clone());
 
-    const size_t actual_axis   = static_cast<size_t>(wrap_around(attributes.axis(), static_cast<int32_t>(src->num_dimensions())));
-    const bool   needs_permute = actual_axis != 0;
+    const size_t actual_axis =
+        static_cast<size_t>(wrap_around(attributes.axis(), static_cast<int32_t>(src->num_dimensions())));
+    const bool needs_permute = actual_axis != 0;
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(needs_permute, "Dynamic fusion softmax on axis!=0 not supported yet.");
 
     // Perform fusion test and check if the operator meets the fusion constraints
@@ -128,17 +133,16 @@ Status GpuSoftmax::validate_op(const GpuWorkloadSketch &sketch,
     return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
 }
 
-void GpuSoftmax::create_op(GpuWorkloadSketch &sketch,
-                           ITensorInfo       *src,
-                           ITensorInfo       *dst,
-                           const Attributes &attributes)
+void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorInfo *dst, const Attributes &attributes)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_LOG_PARAMS(src, dst, attributes);
     TensorShape  logits_sum_shape = src->tensor_shape();
-    ITensorInfo *logits           = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
+    ITensorInfo *logits           = sketch.implementation().create_auxiliary_tensor(
+                  src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
     logits_sum_shape.set(0, 1);
-    ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
+    ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor(
+        src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
 
     // Auto initialize dst tensor info and the auxiliary tensor infos as well
     auto_init_if_empty(*dst, *src->clone());
@@ -151,7 +155,7 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch,
     auto      &comp_graph = sketch.implementation().component_graph();
     const auto sketch_ctx = sketch.implementation().context();
 
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
         ARM_COMPUTE_UNUSED(cl_compile_ctx);
@@ -160,7 +164,7 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch,
         // Add Direct Conv2d Component
         {
             auto properties = IGpuKernelComponent::Properties();
-            properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+            properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
             ArgumentPack<ITensorInfo> arguments_exp_sum;
             ArgumentPack<ITensorInfo> arguments_norm;
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp
index 8240008f2a..e5d62c9930 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h"
+
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
 
 #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
@@ -32,12 +33,11 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-Status GpuSub::validate_op(const GpuWorkloadSketch &sketch,
-                           const ITensorInfo       *lhs,
-                           const ITensorInfo       *rhs)
+Status GpuSub::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8,
+                                                         DataType::S16, DataType::S32);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
 
     // Set the elementwise operation to Sub then call the elementwise common validate_op
@@ -46,12 +46,11 @@ Status GpuSub::validate_op(const GpuWorkloadSketch &sketch,
     return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
 }
 
-Status GpuSub::is_supported_op(const GpuWorkloadContext &context,
-                               const ITensorInfo        *lhs,
-                               const ITensorInfo        *rhs)
+Status GpuSub::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8,
+                                                         DataType::S16, DataType::S32);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
 
     // Set the elementwise operation to Sub then call the elementwise common is_supported_op
@@ -60,9 +59,7 @@ Status GpuSub::is_supported_op(const GpuWorkloadContext &context,
     return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
 }
 
-ITensorInfo *GpuSub::create_op(GpuWorkloadSketch &sketch,
-                               ITensorInfo       *lhs,
-                               ITensorInfo       *rhs)
+ITensorInfo *GpuSub::create_op(GpuWorkloadSketch &sketch, ITensorInfo *lhs, ITensorInfo *rhs)
 {
     // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op()
     // Set the elementwise operation to Sub then call the elementwise common create_op
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
index c00716c76e..bf0f274c5c 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
@@ -23,14 +23,15 @@
  */
 
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuTanh.h"
+
 #include "arm_compute/core/experimental/Types.h"
 
+#include "src/common/utils/Log.h"
+#include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/common/utils/Log.h"
 
 namespace arm_compute
 {
@@ -40,9 +41,7 @@ namespace dynamic_fusion
 {
 namespace
 {
-Status is_supported_op_helper(const GpuWorkloadContext &context,
-                              const ITensorInfo        *src,
-                              const ITensorInfo        *dst)
+Status is_supported_op_helper(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *dst)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
@@ -50,20 +49,21 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
     TensorInfo         dst_info_to_validate;
     const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-    if(dst != nullptr)
+    if (dst != nullptr)
     {
         dst_info_to_validate_ptr = dst;
     }
 
     auto_init_if_empty(dst_info_to_validate, *src->clone());
 
-    const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::TANH };
+    const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::TANH};
 
     // Check components
-    if(context.gpu_language() == GpuLanguage::OpenCL)
+    if (context.gpu_language() == GpuLanguage::OpenCL)
     {
         // Validate Activation Component
-        const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+        const auto properties =
+            IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
         ArgumentPack<ITensorInfo> arguments;
         arguments.add_const_tensor(ACL_SRC, src);
@@ -80,14 +80,12 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
 constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
 } // namespace
 
-Status GpuTanh::is_supported_op(const GpuWorkloadContext &context,
-                                const ITensorInfo        *src)
+Status GpuTanh::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src)
 {
     return is_supported_op_helper(context, src, nullptr);
 }
 
-Status GpuTanh::validate_op(const GpuWorkloadSketch &sketch,
-                            const ITensorInfo       *src)
+Status GpuTanh::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
 
@@ -112,8 +110,7 @@ Status GpuTanh::validate_op(const GpuWorkloadSketch &sketch,
     return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate);
 }
 
-ITensorInfo *GpuTanh::create_op(GpuWorkloadSketch     &sketch,
-                                ITensorInfo           *src)
+ITensorInfo *GpuTanh::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_LOG_PARAMS(src);
@@ -128,15 +125,15 @@ ITensorInfo *GpuTanh::create_op(GpuWorkloadSketch     &sketch,
     // Translate into components and add to component graph
     GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
 
-    const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::TANH };
+    const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::TANH};
 
     const auto *const sketch_ctx = sketch.implementation().context();
 
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         // Add Activation Component
         auto properties = IGpuKernelComponent::Properties();
-        properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+        properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
         ArgumentPack<ITensorInfo> arguments;
         arguments.add_const_tensor(ACL_SRC, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
index 7c087c9a7b..d79a4c42c9 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
@@ -22,11 +22,12 @@
  * SOFTWARE.
  */
 #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
+
 #include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 
 namespace arm_compute
 {
@@ -38,9 +39,10 @@ namespace
 {
 void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *lhs, const ITensorInfo *rhs)
 {
-    if(dst->total_size() == 0U)
+    if (dst->total_size() == 0U)
     {
-        const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*lhs, *rhs);
+        const std::pair<TensorShape, ValidRegion> broadcast_pair =
+            ITensorInfo::broadcast_shape_and_valid_region(*lhs, *rhs);
         auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(broadcast_pair.first));
     }
 }
@@ -56,7 +58,7 @@ Status is_supported_op_helper(const GpuWorkloadContext                &context,
     TensorInfo         dst_info_to_validate;
     const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-    if(dst != nullptr)
+    if (dst != nullptr)
     {
         dst_info_to_validate_ptr = dst;
     }
@@ -64,7 +66,7 @@ Status is_supported_op_helper(const GpuWorkloadContext                &context,
     calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs);
 
     // Check components
-    if(context.gpu_language() == GpuLanguage::OpenCL)
+    if (context.gpu_language() == GpuLanguage::OpenCL)
     {
         const auto cl_compile_ctx = context.cl_compile_context();
         ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
@@ -90,7 +92,8 @@ Status is_supported_op_helper(const GpuWorkloadContext                &context,
 GpuOperatorType operator_type = GpuOperatorType::Simple;
 } // namespace
 
-ElementwiseBinaryCommonAttributes &ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation)
+ElementwiseBinaryCommonAttributes &
+ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation)
 {
     _operation = operation;
     return *this;
@@ -157,14 +160,14 @@ ITensorInfo *GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch
 
     const auto sketch_ctx = sketch.implementation().context();
 
-    if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+    if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
     {
         ARM_COMPUTE_ERROR_ON_NULLPTR(sketch_ctx->cl_compile_context());
 
         // Add ElementwiseBinary Component
         {
             auto properties = IGpuKernelComponent::Properties();
-            properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+            properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, lhs);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
index 0972b4e8e2..775b0a0c8c 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
@@ -22,8 +22,10 @@
  * SOFTWARE.
  */
 #include "GpuKernelVariableTable.h"
+
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/ITensorInfo.h"
+
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 
 namespace arm_compute
@@ -32,14 +34,17 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, const std::string &alias)
+void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group,
+                                              const ITensorInfo             *tensor,
+                                              GpuKernelArgumentInfo          argument_info,
+                                              const std::string             &alias)
 {
     ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
 
     // Do not re-declare if the variable associated with the tensor has already been declared
     auto it = _vars.find(tensor->id());
 
-    if(it != _vars.end())
+    if (it != _vars.end())
     {
         ARM_COMPUTE_ERROR_ON(!(it->second.kernel_argument_info == argument_info));
         return;
@@ -47,14 +52,12 @@ void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &com
 
     const auto target = comp_group.get_tile_for_tensor(tensor);
 
-    if(target != tensor)
+    if (target != tensor)
     {
         // If the tensor uses a shared tile, don't declare another variable.
         it = _vars.find(target->id());
 
-        ARM_COMPUTE_ERROR_ON_MSG(
-            it == _vars.end(),
-            "The variable used for this tensor must have been declared.");
+        ARM_COMPUTE_ERROR_ON_MSG(it == _vars.end(), "The variable used for this tensor must have been declared.");
 
         _vars[tensor->id()] = it->second;
     }
@@ -64,7 +67,7 @@ void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &com
         std::stringstream ss;
         ss << alias << "_t" << abs(tensor->id());
         const auto     uniq_name = ss.str();
-        TensorVariable var{ tensor->id(), uniq_name, argument_info };
+        TensorVariable var{tensor->id(), uniq_name, argument_info};
 
         _vars.emplace(tensor->id(), var);
     }
@@ -76,12 +79,13 @@ GpuKernelVariableTable::TensorVariable GpuKernelVariableTable::get_variable(cons
     return var;
 }
 
-GpuKernelVariableTable::VariableList GpuKernelVariableTable::get_variable_list(const std::vector<const ITensorInfo *> &tensors) const
+GpuKernelVariableTable::VariableList
+GpuKernelVariableTable::get_variable_list(const std::vector<const ITensorInfo *> &tensors) const
 {
     VariableList vars{};
-    for(const auto &tensor : tensors)
+    for (const auto &tensor : tensors)
     {
-        if(!tensor->has_valid_id())
+        if (!tensor->has_valid_id())
         {
             continue;
         }
@@ -90,23 +94,19 @@ GpuKernelVariableTable::VariableList GpuKernelVariableTable::get_variable_list(c
     return vars;
 }
 
-TagVal::TagVal(const GpuKernelVariableTable::TensorVariable &var)
-    : value{ var.uniq_name }
+TagVal::TagVal(const GpuKernelVariableTable::TensorVariable &var) : value{var.uniq_name}
 {
 }
 
-TagVal::TagVal(const std::string &val)
-    : value{ val }
+TagVal::TagVal(const std::string &val) : value{val}
 {
 }
 
-TagVal::TagVal(const char *val)
-    : value{ std::string(val) }
+TagVal::TagVal(const char *val) : value{std::string(val)}
 {
 }
 
-TagVal::TagVal(const DataType &data_type)
-    : value{ get_cl_type_from_data_type(data_type) }
+TagVal::TagVal(const DataType &data_type) : value{get_cl_type_from_data_type(data_type)}
 {
 }
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
index a49d38e10c..c17f131ada 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE
 
 #include "arm_compute/core/ITensorInfo.h"
+
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
 #include "support/AclRequires.h"
 #include "support/StringSupport.h"
@@ -55,11 +56,11 @@ public:
     struct TensorVariable
     {
     public:
-        TensorVariable()                       = default;
-        TensorVariable(const TensorVariable &) = default;
+        TensorVariable()                                        = default;
+        TensorVariable(const TensorVariable &)                  = default;
         TensorVariable       &operator=(const TensorVariable &) = default;
-        ITensorInfo::Id       id{ ITensorInfo::invalid_tensor_id };
-        std::string           uniq_name{ "empty" }; // Unique name, also the final variable name used in the built code
+        ITensorInfo::Id       id{ITensorInfo::invalid_tensor_id};
+        std::string           uniq_name{"empty"}; // Unique name, also the final variable name used in the built code
         GpuKernelArgumentInfo kernel_argument_info{};
         bool                  has_valid_id() const
         {
@@ -76,7 +77,10 @@ public:
      * @param[in] argument_info Kernel argument information
      * @param[in] alias         Alias for the variable. Will be used as part of the variable name
      */
-    void declare_variable(const GpuKernelComponentGroup &comp_group, const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, const std::string &alias = "unnamed");
+    void declare_variable(const GpuKernelComponentGroup &comp_group,
+                          const ITensorInfo             *tensor,
+                          GpuKernelArgumentInfo          argument_info,
+                          const std::string             &alias = "unnamed");
     /** Get the @ref TensorVariable associated with @p tensor
      *
      * @param[in] tensor Tensor info to be queried
@@ -106,8 +110,7 @@ struct TagVal
     TagVal(const GpuKernelVariableTable::TensorVariable &var);
     /** Construct a @ref TagVal from an integral type */
     template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
-    TagVal(T val)
-        : value{ support::cpp11::to_string(val) }
+    TagVal(T val) : value{support::cpp11::to_string(val)}
     {
     }
     /** Construct a @ref TagVal from a string */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
index 4a1fb142d6..9d0b4f592a 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
@@ -27,6 +27,7 @@
 #include "arm_compute/core/CL/CLCompileContext.h"
 #include "arm_compute/core/ITensorInfo.h"
 #include "arm_compute/core/Window.h"
+
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
 #include "src/dynamic_fusion/sketch/gpu/components/Types.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
@@ -57,8 +58,7 @@ public:
      * @param[in] id      Component id
      * @param[in] tensors Tensor arguments to the components
      */
-    IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
-        : _id{ id }, _tensors{ tensors }
+    IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) : _id{id}, _tensors{tensors}
     {
     }
     /** Destructor */
@@ -112,7 +112,7 @@ public:
     /** Generate the header list used in the component */
     virtual std::set<std::string> get_headers_list() const
     {
-        return std::set<std::string> {};
+        return std::set<std::string>{};
     }
     /** Generate the execution window for the component */
     virtual Window get_window() const
@@ -131,7 +131,7 @@ public:
     }
 
 private:
-    ComponentId               _id{ -1 };
+    ComponentId               _id{-1};
     ArgumentPack<ITensorInfo> _tensors{};
 };
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
index 3c7c843dd8..c165fb5f33 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/utils/ActivationFunctionUtils.h"
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
 #include "arm_compute/core/utils/StringUtils.h"
+
 #include "src/core/helpers/WindowHelpers.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 #include "support/StringSupport.h"
@@ -39,10 +40,7 @@ namespace dynamic_fusion
 ClTemplateActivation::ClTemplateActivation(ComponentId                      id,
                                            const ArgumentPack<ITensorInfo> &tensors,
                                            const Attributes                &attributes)
-    : IGpuTemplateComponentWriter{ id, tensors },
-      _src{},
-      _dst{},
-      _attributes{ attributes }
+    : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC);
     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST);
@@ -62,7 +60,7 @@ std::string ClTemplateActivation::get_component_code(const ComponentGroup &comp_
     code = R"_(
 //------------------ START KERNEL {{meta_kernel_id}} ---------------------
 )_";
-    if(is_root)
+    if (is_root)
     {
         code += R"_(
 // IN(src)              {{src}}
@@ -104,17 +102,11 @@ LOOP_UNROLLING(int, i, 0, 1, M0,
 
 void ClTemplateActivation::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
 {
-    vtable.declare_variable(
-        comp_group,
-        _src,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "src");
-
-    vtable.declare_variable(
-        comp_group,
-        _dst,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "dst");
+    vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "src");
+
+    vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "dst");
 }
 
 TagLUT ClTemplateActivation::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -173,7 +165,7 @@ std::string ClTemplateActivation::get_config_id() const
 
 std::set<std::string> ClTemplateActivation::get_headers_list() const
 {
-    return std::set<std::string>{ "helpers.h", "tile_helpers.h", "activation_float_helpers.h" };
+    return std::set<std::string>{"helpers.h", "tile_helpers.h", "activation_float_helpers.h"};
 }
 
 Window ClTemplateActivation::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
index ec78cf6ce5..88ee370342 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/experimental/Types.h"
 #include "arm_compute/function_info/ActivationLayerInfo.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
index 4956879ad3..0da3a73801 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
@@ -25,6 +25,7 @@
 
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
 #include "arm_compute/core/utils/StringUtils.h"
+
 #include "src/core/helpers/WindowHelpers.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 
@@ -35,7 +36,7 @@ namespace experimental
 namespace dynamic_fusion
 {
 ClTemplateCast::ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
-    : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}, _attributes{ attributes }
+    : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -62,7 +63,7 @@ std::string ClTemplateCast::get_component_code(const ComponentGroup &comp_group)
 //------------------ START KERNEL {{meta_kernel_id}} CAST ---------------------
 )_";
 
-    if(is_root)
+    if (is_root)
     {
         code += R"_(
 // IN_0(src)            {{src}}
@@ -82,14 +83,15 @@ TILE(uint, M0, 1, g_dst_indirect_y);
     {
 )_";
 
-    if(kernel_name == "cast_down" && is_data_type_quantized(_src->data_type()))
+    if (kernel_name == "cast_down" && is_data_type_quantized(_src->data_type()))
     {
         code += R"_(
     {{tmp}}[m0].v ^= (VEC_DATA_TYPE({{DATA_TYPE_IN}}, N0))0x80;
 )_";
     }
 
-    if(kernel_name == "cast_down" && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
+    if (kernel_name == "cast_down" &&
+        (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
     {
         code += R"_(
     {{dst}}[m0].v = CONVERT_SAT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0));
@@ -106,7 +108,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
     })
 )_";
 
-    if(is_root)
+    if (is_root)
     {
         code += R"_(
     LOOP_UNROLLING(int, i, 0, 1, M0,
@@ -128,17 +130,11 @@ TILE(uint, M0, 1, g_dst_indirect_y);
 
 void ClTemplateCast::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
 {
-    vtable.declare_variable(
-        comp_group,
-        _src,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "src");
-
-    vtable.declare_variable(
-        comp_group,
-        _dst,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "dst");
+    vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "src");
+
+    vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "dst");
 }
 
 TagLUT ClTemplateCast::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -199,7 +195,7 @@ std::string ClTemplateCast::get_config_id() const
 
 std::set<std::string> ClTemplateCast::get_headers_list() const
 {
-    return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+    return std::set<std::string>{"helpers.h", "tile_helpers.h"};
 }
 
 Window ClTemplateCast::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
index ab7cc9f05a..8380620ab2 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
@@ -36,17 +36,17 @@ ClTemplateDepthwiseConv2d::ClTemplateDepthwiseConv2d(ComponentId
                                                      const ArgumentPack<ITensorInfo> &tensors,
                                                      const Attributes                &attributes,
                                                      const Settings                  &settings)
-    : IGpuTemplateComponentWriter{ id, tensors },
+    : IGpuTemplateComponentWriter{id, tensors},
       _src{},
       _weight{},
       _bias{},
       _dst{},
-      _attributes{ attributes },
-      _settings{ settings }
+      _attributes{attributes},
+      _settings{settings}
 {
     _src    = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
-    if(this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
+    if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
     {
         _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
     }
@@ -71,7 +71,7 @@ std::string ClTemplateDepthwiseConv2d::get_component_code(const ComponentGroup &
 // IN_1(wei)            {{weight}}
 )_";
 
-    if(_bias != nullptr && _bias->has_valid_id())
+    if (_bias != nullptr && _bias->has_valid_id())
     {
         code += R"_(
 // IN_1(bia)            {{bias}}
@@ -113,7 +113,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
     })
 )_";
 
-    if(_weight->dimension(height_idx) < 5)
+    if (_weight->dimension(height_idx) < 5)
     {
         code += R"_(
     LOOP_UNROLLING(int, yk, 0, 1, _IWEI_HEIGHT,
@@ -147,7 +147,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
             {
 )_";
 
-    if(!_settings.is_fma_available())
+    if (!_settings.is_fma_available())
     {
         code += R"_(
                 {{dst}}[m0].v += a[xk + m0].v * b[xk].v;
@@ -166,14 +166,14 @@ TILE(uint, M0, 1, g_dst_indirect_y);
     }
 )_";
 
-    if(_weight->dimension(height_idx) < 5)
+    if (_weight->dimension(height_idx) < 5)
     {
         code += R"_(
     )
 )_";
     }
 
-    if(_bias && _bias->has_valid_id())
+    if (_bias && _bias->has_valid_id())
     {
         code += R"_(
         TILE({{BIA_DATA_TYPE}}, 1, N0, {{bias}});
@@ -198,44 +198,31 @@ TILE(uint, M0, 1, g_dst_indirect_y);
     return code;
 }
 
-void ClTemplateDepthwiseConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+void ClTemplateDepthwiseConv2d::declare_variables(GpuKernelVariableTable &vtable,
+                                                  const ComponentGroup   &comp_group) const
 {
-    const GpuKernelArgumentInfo::Type input_type = _settings.export_input_to_cl_image() ?
-                                                       GpuKernelArgumentInfo::Type::Tensor_4D_t_Image :
-                                                       GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
-    vtable.declare_variable(
-        comp_group,
-        _src,
-        GpuKernelArgumentInfo(input_type),
-        "src");
-
-    const GpuKernelArgumentInfo::Type weight_type = _settings.export_weights_to_cl_image() ?
-                                                        GpuKernelArgumentInfo::Type::Tensor_4D_t_Image :
-                                                        GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
-    vtable.declare_variable(
-        comp_group,
-        _weight,
-        GpuKernelArgumentInfo(weight_type),
-        "weight");
-
-    if(_bias != nullptr && _bias->has_valid_id()) // optional bias
+    const GpuKernelArgumentInfo::Type input_type = _settings.export_input_to_cl_image()
+                                                       ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
+                                                       : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+
+    vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(input_type), "src");
+
+    const GpuKernelArgumentInfo::Type weight_type = _settings.export_weights_to_cl_image()
+                                                        ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
+                                                        : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+
+    vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight");
+
+    if (_bias != nullptr && _bias->has_valid_id()) // optional bias
     {
-        vtable.declare_variable(
-            comp_group,
-            _bias,
-            GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector),
-            "bias");
+        vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias");
     }
-    vtable.declare_variable(
-        comp_group,
-        _dst,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "dst");
+    vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "dst");
 }
 
-TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
+                                              const ComponentGroup         &comp_group) const
 {
     TagLUT lut{};
 
@@ -243,7 +230,7 @@ TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtab
     lut["src"]    = vtable.get_variable(_src);
     lut["weight"] = vtable.get_variable(_weight);
 
-    if(_bias != nullptr && _bias->has_valid_id()) // optional bias
+    if (_bias != nullptr && _bias->has_valid_id()) // optional bias
     {
         lut["bias"]          = vtable.get_variable(_bias);
         lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
@@ -259,7 +246,7 @@ TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtab
     lut["SRC_DATA_TYPE"]  = _src->data_type();
     lut["WEI_DATA_TYPE"]  = _weight->data_type();
 
-    switch(vtable.get_variable(_src).kernel_argument_info.type)
+    switch (vtable.get_variable(_src).kernel_argument_info.type)
     {
         case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
         case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
@@ -271,7 +258,7 @@ TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtab
             break;
     }
 
-    switch(vtable.get_variable(_weight).kernel_argument_info.type)
+    switch (vtable.get_variable(_weight).kernel_argument_info.type)
     {
         case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
         case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
@@ -318,7 +305,7 @@ CLBuildOptions ClTemplateDepthwiseConv2d::get_build_options(const ComponentGroup
 
     CLBuildOptions build_opts{};
 
-    if(_settings.fast_relaxed_math())
+    if (_settings.fast_relaxed_math())
     {
         build_opts.add_option("-cl-fast-relaxed-math");
     }
@@ -361,7 +348,7 @@ std::string ClTemplateDepthwiseConv2d::get_config_id() const
 
 std::set<std::string> ClTemplateDepthwiseConv2d::get_headers_list() const
 {
-    return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+    return std::set<std::string>{"helpers.h", "tile_helpers.h"};
 }
 
 Window ClTemplateDepthwiseConv2d::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
index 84b689ef64..5d04c687c3 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D
 
 #include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
 
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
index 3322487910..f6a7a58d1d 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
@@ -23,14 +23,13 @@
  */
 #include "ClTemplateDirectConv2d.h"
 
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/core/utils/StringUtils.h"
-#include "src/core/helpers/WindowHelpers.h"
 
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 #include "support/StringSupport.h"
 
 namespace arm_compute
@@ -43,17 +42,17 @@ ClTemplateDirectConv2d::ClTemplateDirectConv2d(ComponentId
                                                const ArgumentPack<ITensorInfo> &tensors,
                                                const Attributes                &attributes,
                                                const Settings                  &settings)
-    : IGpuTemplateComponentWriter{ id, tensors },
+    : IGpuTemplateComponentWriter{id, tensors},
       _src{},
       _weight{},
       _bias{},
       _dst{},
-      _attributes{ attributes },
-      _settings{ settings }
+      _attributes{attributes},
+      _settings{settings}
 {
     _src    = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
-    if(this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
+    if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
     {
         _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
     }
@@ -79,7 +78,7 @@ std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &com
 // IN_0(src)            {{src}}
 // IN_1(wei)            {{weight}}
 )_";
-    if(_bias && _bias->has_valid_id())
+    if (_bias && _bias->has_valid_id())
     {
         code += R"_(
 // IN_1(bia)            {{bias}}
@@ -161,7 +160,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
         }
 )_";
 
-    if(leftover_loop)
+    if (leftover_loop)
     {
         code += R"_(
         for(; ck < _ISRC_CHANNELS; ++ck)
@@ -186,9 +185,9 @@ TILE(uint, M0, 1, g_dst_indirect_y);
             T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}});
         }
     )_";
-}
+    }
 
-code += R"_(
+    code += R"_(
 #undef _I_WEI_WIDTH
 #undef _I_WEI_HEIGHT
 #undef _ISRC_WIDTH
@@ -202,7 +201,7 @@ code += R"_(
     }
 )_";
 
-    if(_bias && _bias->has_valid_id())
+    if (_bias && _bias->has_valid_id())
     {
         code += R"_(
         TILE({{BIA_DATA_TYPE}}, 1, N0, bias0);
@@ -211,9 +210,9 @@ code += R"_(
 
         T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
     )_";
-}
+    }
 
-code += R"_(
+    code += R"_(
     LOOP_UNROLLING(int, i, 0, 1, M0,
     {
         g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{DST_WIDTH}} * {{DST_HEIGHT}}) - 1);
@@ -227,32 +226,19 @@ code += R"_(
 
 void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
 {
-    vtable.declare_variable(
-        comp_group,
-        _src,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "src");
-
-    const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image() ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-    vtable.declare_variable(
-        comp_group,
-        _weight,
-        GpuKernelArgumentInfo(weight_type),
-        "weight");
-
-    if(_bias && _bias->has_valid_id()) // optional bias
+    vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "src");
+
+    const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image()
+                                                        ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
+                                                        : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+    vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight");
+
+    if (_bias && _bias->has_valid_id()) // optional bias
     {
-        vtable.declare_variable(
-            comp_group,
-            _bias,
-            GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector),
-            "bias");
+        vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias");
     }
-    vtable.declare_variable(
-        comp_group,
-        _dst,
-        GpuKernelArgumentInfo(common_tensor_type),
-        "dst");
+    vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
 }
 
 TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -262,7 +248,7 @@ TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
     lut["src"]    = vtable.get_variable(_src);
     lut["weight"] = vtable.get_variable(_weight);
 
-    if(_bias && _bias->has_valid_id()) // optional bias
+    if (_bias && _bias->has_valid_id()) // optional bias
     {
         lut["bias"]          = vtable.get_variable(_bias);
         lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
@@ -279,34 +265,34 @@ TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
     lut["WEI_DATA_TYPE"]  = _weight->data_type();
 
     lut["SRC_TENSOR_TYPE"] = "BUFFER";
-    switch(vtable.get_variable(_weight).kernel_argument_info.type)
+    switch (vtable.get_variable(_weight).kernel_argument_info.type)
     {
         case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
         case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
         case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
-    {
-        lut["WEI_TENSOR_TYPE"] = "IMAGE";
-        break;
-    }
+        {
+            lut["WEI_TENSOR_TYPE"] = "IMAGE";
+            break;
+        }
         default:
-    {
-        lut["WEI_TENSOR_TYPE"] = "BUFFER";
-        break;
-    }
+        {
+            lut["WEI_TENSOR_TYPE"] = "BUFFER";
+            break;
+        }
     }
-    const auto width_idx  = 1;
-    const auto height_idx = 2;
+    const auto width_idx   = 1;
+    const auto height_idx  = 2;
     const auto channel_idx = 0;
 
-    lut["SRC_WIDTH"] = _src->dimension(width_idx);
-    lut["SRC_HEIGHT"] = _src->dimension(height_idx);
+    lut["SRC_WIDTH"]    = _src->dimension(width_idx);
+    lut["SRC_HEIGHT"]   = _src->dimension(height_idx);
     lut["SRC_CHANNELS"] = _src->dimension(channel_idx);
 
-    lut["WEI_WIDTH"]      = _weight->dimension(width_idx);
-    lut["WEI_HEIGHT"]     = _weight->dimension(height_idx);
+    lut["WEI_WIDTH"]  = _weight->dimension(width_idx);
+    lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
 
-    lut["DST_WIDTH"] = _dst->dimension(width_idx);
-    lut["DST_HEIGHT"] = _dst->dimension(height_idx);
+    lut["DST_WIDTH"]    = _dst->dimension(width_idx);
+    lut["DST_HEIGHT"]   = _dst->dimension(height_idx);
     lut["DST_CHANNELS"] = _dst->dimension(channel_idx);
 
     lut["STRIDE_X"] = _attributes.stride().x();
@@ -324,14 +310,14 @@ CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &c
 {
     const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
 
-    const auto         root_window      = comp_group.get_root_component()->template_writer()->get_window();
-    const unsigned int n0               = root_window.x().step();
-    const unsigned int m0               = root_window.y().step();
-    const unsigned int k0               = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
+    const auto         root_window = comp_group.get_root_component()->template_writer()->get_window();
+    const unsigned int n0          = root_window.x().step();
+    const unsigned int m0          = root_window.y().step();
+    const unsigned int k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
     const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
 
     CLBuildOptions build_opts{};
-    if(_settings.fast_relaxed_math())
+    if (_settings.fast_relaxed_math())
     {
         build_opts.add_option("-cl-fast-relaxed-math");
     }
@@ -379,7 +365,7 @@ std::string ClTemplateDirectConv2d::get_config_id() const
 
 std::set<std::string> ClTemplateDirectConv2d::get_headers_list() const
 {
-    return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+    return std::set<std::string>{"helpers.h", "tile_helpers.h"};
 }
 
 Window ClTemplateDirectConv2d::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
index 8988d3ca1c..03c8cd2f15 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/experimental/Types.h"
 #include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
index c0481ae190..78bff3c3f3 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
@@ -23,14 +23,13 @@
  */
 #include "ClTemplateElementwiseBinary.h"
 
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
-
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/core/utils/StringUtils.h"
-#include "src/core/helpers/WindowHelpers.h"
 
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 #include "support/StringSupport.h"
 
 namespace arm_compute
@@ -44,11 +43,7 @@ constexpr unsigned int vector_size_byte_opencl = 16;
 ClTemplateElementwiseBinary::ClTemplateElementwiseBinary(ComponentId                      id,
                                                          const ArgumentPack<ITensorInfo> &tensors,
                                                          const Attributes                &attributes)
-    : IGpuTemplateComponentWriter{ id, tensors },
-      _lhs{},
-      _rhs{},
-      _dst{},
-      _attributes{ attributes }
+    : IGpuTemplateComponentWriter{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}
 {
     _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -69,67 +64,67 @@ std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup
     const bool  is_rhs_input = comp_group.is_input_tensor(_rhs);
 
     code =
-R"_(
+        R"_(
     //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
 )_";
 
-    if(is_root)
+    if (is_root)
     {
         code +=
-R"_(
+            R"_(
     TILE(uint, M0, 1, g_dst_indirect_y);
 )_";
     }
 
-    if(is_lhs_input)
+    if (is_lhs_input)
     {
         code +=
-R"_(
+            R"_(
     TILE({{DATA_TYPE}}, {{lhs_m0}}, N0, {{lhs}});
 )_";
     }
 
-    if(is_rhs_input)
+    if (is_rhs_input)
     {
         code +=
-R"_(
+            R"_(
     TILE({{DATA_TYPE}}, {{rhs_m0}}, N0, {{rhs}});
 )_";
     }
 
     code +=
-R"_(
+        R"_(
     {
 )_";
 
-    if(is_lhs_input)
+    if (is_lhs_input)
     {
         code +=
-R"_(
+            R"_(
         {{lhs}}_offset_first_element_in_bytes += g_ind_2 * {{lhs}}_stride_w;
         T_LOAD({{DATA_TYPE}}, {{lhs_m0}}, {{lhs_n0}}, BUFFER, {{lhs}}, {{lhs_start_ind_0}}, {{lhs_start_ind_1}}, 1, {{lhs}}_stride_y, {{lhs}});
 )_";
     }
 
-    if(is_rhs_input)
+    if (is_rhs_input)
     {
         code +=
-R"_(
+            R"_(
         {{rhs}}_offset_first_element_in_bytes += g_ind_2 * {{rhs}}_stride_w;
         T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_ind_0}}, {{rhs_start_ind_1}}, 1, {{rhs}}_stride_y, {{rhs}});
 )_";
     }
 
     code +=
-R"_(
+        R"_(
         T_ELTWISE_{{BROADCAST_OP}}{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{lhs}}, {{rhs}}, {{dst}});
 )_";
 
-    if(is_root)
+    if (is_root)
     {
         // Calculate the destination indirect Y
         code +=
-R"_(
+            R"_(
         LOOP_UNROLLING(int, i, 0, 1, M0,
         {
             g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1);
@@ -139,7 +134,7 @@ R"_(
     }
 
     code +=
-R"_(
+        R"_(
     }
     //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
 )_";
@@ -147,28 +142,18 @@ R"_(
     return code;
 }
 
-void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtable,
+                                                    const ComponentGroup   &comp_group) const
 {
-    vtable.declare_variable(
-        comp_group,
-        _lhs,
-        GpuKernelArgumentInfo(common_tensor_type),
-        "lhs");
-
-    vtable.declare_variable(
-        comp_group,
-        _rhs,
-        GpuKernelArgumentInfo(common_tensor_type),
-        "rhs");
-
-    vtable.declare_variable(
-        comp_group,
-        _dst,
-        GpuKernelArgumentInfo(common_tensor_type),
-        "dst");
+    vtable.declare_variable(comp_group, _lhs, GpuKernelArgumentInfo(common_tensor_type), "lhs");
+
+    vtable.declare_variable(comp_group, _rhs, GpuKernelArgumentInfo(common_tensor_type), "rhs");
+
+    vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
 }
 
-TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable,
+                                                const ComponentGroup         &comp_group) const
 {
     TagLUT lut{};
 
@@ -182,7 +167,7 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
     lut["dst"]     = vtable.get_variable(_dst);
     lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor());
 
-    switch(_attributes.operation())
+    switch (_attributes.operation())
     {
         case Attributes::ElementwiseOp::Add:
             lut["ELTWISE_OP"] = "ADD";
@@ -197,10 +182,10 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
             ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
     }
 
-    ARM_COMPUTE_ERROR_ON(
-        comp_group.is_intermediate_tensor(_lhs) && detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
-    ARM_COMPUTE_ERROR_ON(
-        comp_group.is_intermediate_tensor(_rhs) && detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
+    ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_lhs) &&
+                         detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
+    ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_rhs) &&
+                         detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
 
     // Set broadcast parameters
     // PRE: All tensors are broadcast-compatible
@@ -228,9 +213,7 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
     lut["rhs_m0"]          = (rhs_broadcast_yz) ? "1" : "M0";
     lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1";
 
-    lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" :
-                          (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" :
-                                               "";
+    lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : "";
 
     return lut;
 }
@@ -268,7 +251,7 @@ std::string ClTemplateElementwiseBinary::get_config_id() const
 
 std::set<std::string> ClTemplateElementwiseBinary::get_headers_list() const
 {
-    return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+    return std::set<std::string>{"helpers.h", "tile_helpers.h"};
 }
 
 Window ClTemplateElementwiseBinary::get_window() const
@@ -279,8 +262,9 @@ Window ClTemplateElementwiseBinary::get_window() const
     // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged
     // This is in line with the collapsing convention used by operators like Conv2d
     output_shape.collapse(2U, 1U);
-    const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
-    Window             win                               = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+    const unsigned int num_elems_processed_per_iteration =
+        adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+    Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
 
     return win;
 }
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
index 8cca954efe..991c0eca44 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY
 
 #include "arm_compute/core/experimental/Types.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
@@ -48,9 +49,7 @@ public:
      * @param[in] tensors    Tensor arguments to the components
      * @param[in] attributes Component attributes
      */
-    ClTemplateElementwiseBinary(ComponentId                      id,
-                                const ArgumentPack<ITensorInfo> &tensors,
-                                const Attributes                &attributes);
+    ClTemplateElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
     /** Prevent instances of this class from being copy constructed */
     ClTemplateElementwiseBinary(const ClTemplateElementwiseBinary &elementwise) = delete;
     /** Prevent instances of this class from being copied */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
index a8d8d32b12..522c33a022 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
 #include "arm_compute/core/utils/StringUtils.h"
+
 #include "src/core/helpers/WindowHelpers.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 #include "support/StringSupport.h"
@@ -38,16 +39,12 @@ namespace dynamic_fusion
 {
 namespace
 {
-    constexpr unsigned int serial_vector_size = 8;
+constexpr unsigned int serial_vector_size = 8;
 } // namespace
 ClTemplateLogits1DMaxShiftExpSum::ClTemplateLogits1DMaxShiftExpSum(ComponentId                      id,
                                                                    const ArgumentPack<ITensorInfo> &tensors,
                                                                    const Attributes                &attributes)
-    : IGpuTemplateComponentWriter{ id, tensors },
-      _src{},
-      _sum{},
-      _dst{},
-      _attributes{ attributes }
+    : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _sum = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -79,7 +76,7 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
 
     const bool beta_defined = (_attributes.beta() != 1.f);
 
-    if(beta_defined)
+    if (beta_defined)
     {
         code += R"_(
     VEC_TYPE beta = (VEC_TYPE){{BETA}};
@@ -91,7 +88,7 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
     const unsigned int     vector_size         = adjust_vec_size(_serial_vector_size, reduction_dim_size);
     const bool             non_multiple_of_n0  = ((reduction_dim_size % vector_size) != 0);
 
-    if(non_multiple_of_n0)
+    if (non_multiple_of_n0)
     {
         code += R"_(
     VEC_TYPE data    = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr);
@@ -111,19 +108,19 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
     VEC_TYPE sum1D = 0;
 )_";
 
-    if(non_multiple_of_n0)
+    if (non_multiple_of_n0)
     {
         code += R"_(
     data -= max_val;
 )_";
-        if(beta_defined)
+        if (beta_defined)
         {
             code += R"_(
     data *= beta;
 )_";
         }
 
-        if(_attributes.is_log_softmax())
+        if (_attributes.is_log_softmax())
         {
             code += R"_(
     VSTORE_PARTIAL(N0, PARTIAL_N0)
@@ -153,14 +150,14 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
         data -= max_val;
 )_";
 
-    if(beta_defined)
+    if (beta_defined)
     {
         code += R"_(
     data *= beta;
 )_";
     }
 
-    if(_attributes.is_log_softmax())
+    if (_attributes.is_log_softmax())
     {
         code += R"_(
     VSTORE(N0)
@@ -191,28 +188,18 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
     return code;
 }
 
-void ClTemplateLogits1DMaxShiftExpSum::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+void ClTemplateLogits1DMaxShiftExpSum::declare_variables(GpuKernelVariableTable &vtable,
+                                                         const ComponentGroup   &comp_group) const
 {
-    vtable.declare_variable(
-        comp_group,
-        _src,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
-        "src");
-
-    vtable.declare_variable(
-        comp_group,
-        _sum,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
-        "sum");
-
-    vtable.declare_variable(
-        comp_group,
-        _dst,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
-        "dst");
+    vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src");
+
+    vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum");
+
+    vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst");
 }
 
-TagLUT ClTemplateLogits1DMaxShiftExpSum::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+TagLUT ClTemplateLogits1DMaxShiftExpSum::get_tag_lut(const GpuKernelVariableTable &vtable,
+                                                     const ComponentGroup         &comp_group) const
 {
     ARM_COMPUTE_UNUSED(comp_group);
 
@@ -241,8 +228,8 @@ CLBuildOptions ClTemplateLogits1DMaxShiftExpSum::get_build_options(const Compone
     ARM_COMPUTE_UNUSED(comp_group);
     CLBuildOptions build_opts{};
 
-    const unsigned int     reduction_dim_size = _src->dimension(0);
-    const unsigned int     vector_size        = adjust_vec_size(serial_vector_size, reduction_dim_size);
+    const unsigned int reduction_dim_size = _src->dimension(0);
+    const unsigned int vector_size        = adjust_vec_size(serial_vector_size, reduction_dim_size);
 
     build_opts.add_option("-DN0=" + support::cpp11::to_string(vector_size));
     build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((reduction_dim_size % vector_size)));
@@ -264,7 +251,7 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_config_id() const
 
 std::set<std::string> ClTemplateLogits1DMaxShiftExpSum::get_headers_list() const
 {
-    return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+    return std::set<std::string>{"helpers.h", "tile_helpers.h"};
 }
 
 Window ClTemplateLogits1DMaxShiftExpSum::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
index 5d232c0cf2..ac9ddaa9d4 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
@@ -46,7 +46,9 @@ public:
      * @param[in] tensors    Tensor arguments to the components
      * @param[in] attributes Component attributes
      */
-    ClTemplateLogits1DMaxShiftExpSum(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
+    ClTemplateLogits1DMaxShiftExpSum(ComponentId                      id,
+                                     const ArgumentPack<ITensorInfo> &tensors,
+                                     const Attributes                &attributes);
     /** Prevent instances of this class from being copy constructed */
     ClTemplateLogits1DMaxShiftExpSum(const ClTemplateLogits1DMaxShiftExpSum &) = delete;
     /** Prevent instances of this class from being copied */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
index 056e570a25..7d7c3e6673 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
@@ -25,6 +25,7 @@
 #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h"
 
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+
 #include "src/core/helpers/WindowHelpers.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 #include "support/StringSupport.h"
@@ -38,11 +39,7 @@ namespace dynamic_fusion
 ClTemplateLogits1DNorm::ClTemplateLogits1DNorm(ComponentId                      id,
                                                const ArgumentPack<ITensorInfo> &tensors,
                                                const Attributes                &attributes)
-    : IGpuTemplateComponentWriter{ id, tensors },
-      _src{},
-      _sum{},
-      _dst{},
-      _attributes{ attributes }
+    : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _sum = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -76,7 +73,7 @@ std::string ClTemplateLogits1DNorm::get_component_code(const ComponentGroup &com
     data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr);
 )_";
 
-    if(_attributes.is_log_softmax())
+    if (_attributes.is_log_softmax())
     {
         code += R"_(
     sum_val = log(sum_val);
@@ -101,23 +98,11 @@ std::string ClTemplateLogits1DNorm::get_component_code(const ComponentGroup &com
 
 void ClTemplateLogits1DNorm::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
 {
-    vtable.declare_variable(
-        comp_group,
-        _src,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
-        "src");
-
-    vtable.declare_variable(
-        comp_group,
-        _sum,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
-        "sum");
-
-    vtable.declare_variable(
-        comp_group,
-        _dst,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
-        "dst");
+    vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src");
+
+    vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum");
+
+    vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst");
 }
 
 TagLUT ClTemplateLogits1DNorm::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -168,14 +153,14 @@ std::string ClTemplateLogits1DNorm::get_config_id() const
 
 std::set<std::string> ClTemplateLogits1DNorm::get_headers_list() const
 {
-    return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+    return std::set<std::string>{"helpers.h", "tile_helpers.h"};
 }
 
 Window ClTemplateLogits1DNorm::get_window() const
 {
     ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
     constexpr unsigned int serial_vector_size = 16;
-    const unsigned int vector_size = adjust_vec_size(serial_vector_size, _src->dimension(0));
+    const unsigned int     vector_size        = adjust_vec_size(serial_vector_size, _src->dimension(0));
 
     Window win = calculate_max_window(*_src, Steps(vector_size));
     return win.collapse(win, Window::DimZ);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
index 34840c2100..ebb0374501 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
@@ -23,14 +23,13 @@
  */
 #include "ClTemplatePool2d.h"
 
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/core/utils/StringUtils.h"
-#include "src/core/helpers/WindowHelpers.h"
 
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 #include "support/StringSupport.h"
 
 namespace arm_compute
@@ -50,11 +49,7 @@ ClTemplatePool2d::ClTemplatePool2d(ComponentId                      id,
                                    const ArgumentPack<ITensorInfo> &tensors,
                                    const Attributes                &attributes,
                                    const Settings                  &settings)
-    : IGpuTemplateComponentWriter{ id, tensors },
-      _src{},
-      _dst{},
-      _attributes{ attributes },
-      _settings{ settings }
+    : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -71,7 +66,7 @@ std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_grou
     ARM_COMPUTE_UNUSED(comp_group);
 
     // Condition to use 2x2 optimized kernel
-    if(_attributes.pool_size() == Size2D(2, 2))
+    if (_attributes.pool_size() == Size2D(2, 2))
     {
         return get_2x2_kernel_code();
     }
@@ -83,11 +78,13 @@ std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_grou
 
 std::string ClTemplatePool2d::get_MxN_kernel_code() const
 {
-    const auto pool_type          = _attributes.pool_type();
-    const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
+    const auto pool_type = _attributes.pool_type();
+    const bool fp_mixed_precision =
+        (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
 
     // Define pool op macro.
-    std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
+    std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
+                                                          : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
 
     // Kernel start
     // Note: If C is not multiple of N0, we shift back of PARTIAL_N0 elements to compute the leftover elements for get_global_id(0) == 0
@@ -129,7 +126,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
 )_";
 
     // Determine filter size depending on if padding is excluded or not
-    if(_attributes.exclude_padding())
+    if (_attributes.exclude_padding())
     {
         code += R"_(
     const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s);
@@ -144,7 +141,8 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
 
     // Loop through pool size
     // if global pooling
-    if(_attributes.pool_size().x() == _src->dimension(width_idx) && _attributes.pool_size().y() == _src->dimension(height_idx))
+    if (_attributes.pool_size().x() == _src->dimension(width_idx) &&
+        _attributes.pool_size().y() == _src->dimension(height_idx))
     {
         // Begin loop
         code += R"_(
@@ -173,7 +171,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
 
     // if condition inside loop - use 32bit acc if mixed_precision.
     // End loop through pooling section.
-    if(fp_mixed_precision)
+    if (fp_mixed_precision)
     {
         // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
         code += R"_(
@@ -194,7 +192,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
     }
 
     // For Pool AVG ONLY, divide pool output by filter size
-    if(pool_type == PoolingType::AVG)
+    if (pool_type == PoolingType::AVG)
     {
         code += R"_(
     res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
@@ -202,7 +200,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
     }
 
     // If mixed precision convert datatype before storing. Then end kernel.
-    if(fp_mixed_precision)
+    if (fp_mixed_precision)
     {
         code += R"_(
     VEC_DATA_TYPE({{DATA_TYPE}}, N0)
@@ -228,9 +226,11 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
 
 std::string ClTemplatePool2d::get_2x2_kernel_code() const
 {
-    const auto  pool_type          = _attributes.pool_type();
-    const bool  fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
-    std::string pool_op            = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
+    const auto pool_type = _attributes.pool_type();
+    const bool fp_mixed_precision =
+        (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
+    std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
+                                                          : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
 
     std::string code = R"_(
 //------------------ START KERNEL {{meta_kernel_id}} ---------------------
@@ -274,7 +274,7 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
     REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0);
 )_";
 
-    if(fp_mixed_precision)
+    if (fp_mixed_precision)
     {
         // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
         code += R"_(
@@ -294,7 +294,7 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
 )_";
     }
 
-    if(pool_type != PoolingType::MAX)
+    if (pool_type != PoolingType::MAX)
     {
         // Make invalid the values loaded if the x or y coordinate was clamped (out-of-bound)
         code += R"_(
@@ -321,10 +321,10 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
     res0 = POOL_OP(res0, data3);
 )_";
 
-    if(pool_type == PoolingType::AVG)
+    if (pool_type == PoolingType::AVG)
     {
         // If avg pooling divide result accordingly.
-        if(_attributes.exclude_padding())
+        if (_attributes.exclude_padding())
         {
             code += R"_(
     res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
@@ -339,7 +339,7 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
     }
 
     // Store result
-    if(fp_mixed_precision)
+    if (fp_mixed_precision)
     {
         code += R"_(
     VEC_DATA_TYPE({{DATA_TYPE}}, N0)
@@ -365,17 +365,11 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
 
 void ClTemplatePool2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
 {
-    vtable.declare_variable(
-        comp_group,
-        _src,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "src");
-
-    vtable.declare_variable(
-        comp_group,
-        _dst,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "dst");
+    vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "src");
+
+    vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "dst");
 }
 
 TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -391,12 +385,15 @@ TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const
     lut["meta_kernel_id"] = id();
 
     // Retrieve relevant data
-    const auto        padding                = _attributes.pad();
-    const auto        stride                 = _attributes.stride();
-    const auto        pool_size              = _attributes.pool_size();
-    const auto        data_type              = _src->data_type();
-    const auto        use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
-    const std::string max_initial_value      = _settings.use_inf_as_limit() ? "(-INFINITY)" : float_to_string_with_full_precision(std::numeric_limits<float>::lowest());
+    const auto padding                = _attributes.pad();
+    const auto stride                 = _attributes.stride();
+    const auto pool_size              = _attributes.pool_size();
+    const auto data_type              = _src->data_type();
+    const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() &&
+                                        _attributes.pool_type() != PoolingType::MAX;
+    const std::string max_initial_value =
+        _settings.use_inf_as_limit() ? "(-INFINITY)"
+                                     : float_to_string_with_full_precision(std::numeric_limits<float>::lowest());
 
     // pool specific
     lut["STRIDE_X"]    = stride.x();
@@ -407,7 +404,8 @@ TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const
     lut["POOL_SIZE_Y"] = pool_size.height;
 
     // Datatypes and variables
-    lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type((use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use.
+    lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type(
+        (use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use.
     lut["DATA_TYPE"]     = get_cl_type_from_data_type(data_type);
     lut["SRC_WIDTH"]     = _src->dimension(width_idx);
     lut["SRC_HEIGHT"]    = _src->dimension(height_idx);
@@ -454,14 +452,14 @@ std::string ClTemplatePool2d::get_config_id() const
 
 std::set<std::string> ClTemplatePool2d::get_headers_list() const
 {
-    return std::set<std::string>{ "helpers.h", "tile_helpers.h", "repeat.h" };
+    return std::set<std::string>{"helpers.h", "tile_helpers.h", "repeat.h"};
 }
 
 Window ClTemplatePool2d::get_window() const
 {
     ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
     const auto         output_shape = _dst->tensor_shape();
-    const unsigned int vec_size     = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
+    const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
 
     // Create and configure kernel window
     auto win = calculate_max_window(output_shape, Steps(vec_size));
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
index ef1c100f44..d1d3c01669 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
@@ -27,6 +27,7 @@
 #include "arm_compute/core/experimental/Types.h"
 #include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
index 8b50f1e209..c882353fcb 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
@@ -25,6 +25,7 @@
 
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
 #include "arm_compute/core/utils/StringUtils.h"
+
 #include "src/core/helpers/WindowHelpers.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
 
@@ -36,11 +37,8 @@ namespace dynamic_fusion
 {
 constexpr unsigned int vector_size_byte_opencl = 16;
 
-ClTemplateReshape::ClTemplateReshape(ComponentId                      id,
-                                     const ArgumentPack<ITensorInfo> &tensors)
-    : IGpuTemplateComponentWriter{ id, tensors },
-      _src{},
-      _dst{}
+ClTemplateReshape::ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
+    : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -97,23 +95,17 @@ TILE(uint, M0, 1, g_dst_indirect_y);
 
 void ClTemplateReshape::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
 {
-    vtable.declare_variable(
-        comp_group,
-        _src,
-        GpuKernelArgumentInfo(common_tensor_type), // GpuKernelArgumentInfo::Type::Image_3D
-        "src");
-
-    vtable.declare_variable(
-        comp_group,
-        _dst,
-        GpuKernelArgumentInfo(common_tensor_type),
-        "dst");
+    vtable.declare_variable(comp_group, _src,
+                            GpuKernelArgumentInfo(common_tensor_type), // GpuKernelArgumentInfo::Type::Image_3D
+                            "src");
+
+    vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
 }
 
 TagLUT ClTemplateReshape::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
 {
     ARM_COMPUTE_UNUSED(comp_group);
-    TagLUT     lut{};
+    TagLUT lut{};
 
     // Arguments and global shared variables
     lut["src"]            = vtable.get_variable(_src);
@@ -153,7 +145,7 @@ std::string ClTemplateReshape::get_config_id() const
 
 std::set<std::string> ClTemplateReshape::get_headers_list() const
 {
-    return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+    return std::set<std::string>{"helpers.h", "tile_helpers.h"};
 }
 
 Window ClTemplateReshape::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
index 56b6585b61..838a21db6d 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE
 
 #include "arm_compute/core/experimental/Types.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
 
@@ -42,8 +43,7 @@ public:
      * @param[in] id      Component id
      * @param[in] tensors Tensor arguments to the components
      */
-    ClTemplateReshape(ComponentId                      id,
-                      const ArgumentPack<ITensorInfo> &tensors);
+    ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors);
     /** Prevent instances of this class from being copy constructed */
     ClTemplateReshape(const ClTemplateReshape &reshape) = delete;
     /** Prevent instances of this class from being copied */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
index aaed1d990d..846c712ceb 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
@@ -27,6 +27,7 @@
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
 #include "arm_compute/core/utils/StringUtils.h"
+
 #include "src/core/helpers/WindowHelpers.h"
 #include "src/core/utils/ScaleUtils.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
@@ -37,8 +38,10 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-ClTemplateResize::ClTemplateResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const ClTemplateResize::Attributes &attributes)
-    : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}, _attributes{ attributes }
+ClTemplateResize::ClTemplateResize(ComponentId                         id,
+                                   const ArgumentPack<ITensorInfo>    &tensors,
+                                   const ClTemplateResize::Attributes &attributes)
+    : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -63,9 +66,9 @@ TILE(uint, 1, 1, g_dst_indirect_y);
     const int bout = g_ind_2 / {{arg_dst}}_h;
 )_";
 
-    if(_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR)
+    if (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR)
     {
-        if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
+        if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
         {
             code += R"_(
     float xi_f = (g_ind_1 * {{SCALE_X}});
@@ -80,7 +83,7 @@ TILE(uint, 1, 1, g_dst_indirect_y);
 )_";
         }
 
-        if(_attributes.align_corners())
+        if (_attributes.align_corners())
         {
             code += R"_(
     xi_f = round(xi_f);
@@ -95,9 +98,9 @@ TILE(uint, 1, 1, g_dst_indirect_y);
     T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, {{dst}});
 )_";
     }
-    else if(_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR)
+    else if (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR)
     {
-        if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
+        if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
         {
             code += R"_(
     float xi_f = (g_ind_1 * {{SCALE_X}});
@@ -137,7 +140,7 @@ TILE(uint, 1, 1, g_dst_indirect_y);
     T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in11);
 )_";
 
-        if(is_data_type_float(_src->data_type()))
+        if (is_data_type_float(_src->data_type()))
         {
             code += R"_(
     const {{SRC_DATA_TYPE}} a  = ({{SRC_DATA_TYPE}})(xi_f - (float)xi);
@@ -158,9 +161,9 @@ TILE(uint, 1, 1, g_dst_indirect_y);
     const float b1 = (1.f - a1);
 
     {{dst}}[0].v = CONVERT_SAT(
-        (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) + 
+        (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
         (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
-        (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) + 
+        (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
         (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0));
 )_";
         }
@@ -179,22 +182,18 @@ TILE(uint, 1, 1, g_dst_indirect_y);
     return code;
 }
 
-void ClTemplateResize::declare_variables(GpuKernelVariableTable &vtable, const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
+void ClTemplateResize::declare_variables(GpuKernelVariableTable                            &vtable,
+                                         const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
 {
-    vtable.declare_variable(
-        comp_group,
-        _src,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "src");
-
-    vtable.declare_variable(
-        comp_group,
-        _dst,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "dst");
+    vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "src");
+
+    vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "dst");
 }
 
-TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
+TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable                      &vtable,
+                                     const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
 {
     TagLUT lut{};
 
@@ -212,8 +211,10 @@ TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, const
     lut["DST_DATA_TYPE"]   = get_cl_type_from_data_type(_dst->data_type());
     lut["CONSTANT_VALUE"]  = string_from_pixel_value(0, _src->data_type());
 
-    const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
-    const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
+    const float scale_x =
+        scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
+    const float scale_y =
+        scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
 
     lut["SCALE_X"] = float_to_string_with_full_precision(scale_x);
     lut["SCALE_Y"] = float_to_string_with_full_precision(scale_y);
@@ -242,7 +243,8 @@ std::string ClTemplateResize::get_config_id() const
     std::string config_id{};
 
     config_id += "resize_";
-    config_id += (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "NEAREST_NEIGHBOR" : "");
+    config_id +=
+        (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "NEAREST_NEIGHBOR" : "");
     config_id += (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "BILINEAR" : "");
     config_id += "_";
     config_id += (_attributes.sampling_policy() == SamplingPolicy::CENTER ? "center" : "topleft");
@@ -260,7 +262,7 @@ std::string ClTemplateResize::get_config_id() const
 
 std::set<std::string> ClTemplateResize::get_headers_list() const
 {
-    return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+    return std::set<std::string>{"helpers.h", "tile_helpers.h"};
 }
 
 Window ClTemplateResize::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
index 217214ced3..d0ec91e0a9 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
@@ -32,7 +32,7 @@ namespace experimental
 namespace dynamic_fusion
 {
 ClTemplateStore::ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
-    : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}
+    : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -61,16 +61,10 @@ std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group
 
 void ClTemplateStore::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
 {
-    vtable.declare_variable(
-        comp_group,
-        _src,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "src");
-    vtable.declare_variable(
-        comp_group,
-        _dst,
-        GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
-        "dst");
+    vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "src");
+    vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+                            "dst");
 }
 
 TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
index 3f97a82204..b8c82ceadd 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
 
 #include "arm_compute/core/experimental/Types.h"
+
 #include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
 
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
index eda15f1d95..d3d7c8db83 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
@@ -24,6 +24,7 @@
 #include "ClTemplateWriter.h"
 
 #include "arm_compute/core/CL/CLKernelLibrary.h"
+
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
 
@@ -39,11 +40,11 @@ std::string ClTemplateWriter::replace_tags(const std::string &code_template, con
     std::string replaced_code    = "";
     bool        scanning_pattern = false;
     std::string pattern_found    = "";
-    for(size_t i = 0; i < code_template.size() - 1; ++i)
+    for (size_t i = 0; i < code_template.size() - 1; ++i)
     {
-        if(!scanning_pattern)
+        if (!scanning_pattern)
         {
-            if(code_template[i] == '{' && code_template[i + 1] == '{')
+            if (code_template[i] == '{' && code_template[i + 1] == '{')
             {
                 i += 1;
                 scanning_pattern = true;
@@ -56,7 +57,7 @@ std::string ClTemplateWriter::replace_tags(const std::string &code_template, con
         }
         else
         {
-            if(code_template[i] == '}' && code_template[i + 1] == '}')
+            if (code_template[i] == '}' && code_template[i + 1] == '}')
             {
                 i += 1;
                 scanning_pattern = false;
@@ -76,8 +77,7 @@ std::string ClTemplateWriter::replace_tags(const std::string &code_template, con
 ClTemplateWriter::~ClTemplateWriter()
 {
 }
-ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components)
-    : _components{ components }
+ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components) : _components{components}
 {
 }
 std::string ClTemplateWriter::get_name()
@@ -91,7 +91,7 @@ std::string ClTemplateWriter::get_code()
 std::string ClTemplateWriter::get_config_id()
 {
     std::string config_id = get_name();
-    for(const auto &comp : _components)
+    for (const auto &comp : _components)
     {
         config_id += "--" + comp->template_writer()->get_config_id() + "--";
     }
@@ -103,7 +103,7 @@ CLBuildOptions ClTemplateWriter::get_build_options()
 {
     CLBuildOptions build_opts{};
 
-    for(const auto &comp : _components)
+    for (const auto &comp : _components)
     {
         build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
     }
@@ -122,11 +122,9 @@ std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
 {
     // Assemble GpuKernelArguments
     std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
-    for(const auto t : _components.get_argument_tensors())
+    for (const auto t : _components.get_argument_tensors())
     {
-        tensors.emplace(
-            t->id(),
-            GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info });
+        tensors.emplace(t->id(), GpuKernelArgument{*t, _vtable.get_variable(t).kernel_argument_info});
     }
     return tensors;
 }
@@ -141,22 +139,24 @@ std::string ClTemplateWriter::write_code()
     std::vector<std::string> component_codes{}; // vector because order matters
 
     // Pass 1: Declare all kernel variables
-    for(auto &component : _components)
+    for (auto &component : _components)
     {
         component->template_writer()->declare_variables(_vtable, _components);
     }
     // Pass 2: Generate component codes
-    for(auto &component : _components)
+    for (auto &component : _components)
     {
         const auto component_writer       = component->template_writer();
         auto       curr_headers_list      = component_writer->get_headers_list();
         auto       curr_additional_macros = component_writer->get_additional_macros();
         auto       curr_component_code    = component_writer->get_component_code(_components);
-        const auto var_lut                = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
+        const auto var_lut                = component_writer->get_tag_lut(
+                           _vtable,
+                           _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
         component_codes.push_back(replace_tags(curr_component_code, var_lut));
 
         headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
-        if(!additional_macros.empty()) // Some components might not have any
+        if (!additional_macros.empty()) // Some components might not have any
         {
             additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
         }
@@ -165,7 +165,7 @@ std::string ClTemplateWriter::write_code()
     // Step 3: Assemble the data gathered by traversing the graph into the string "code"
     std::string code = "";
 
-    for(auto &header : headers_list)
+    for (auto &header : headers_list)
     {
 #if defined(EMBEDDED_KERNELS)
         code += CLKernelLibrary::get().get_program(header).first;
@@ -174,16 +174,14 @@ std::string ClTemplateWriter::write_code()
 #endif // defined(EMBEDDED_KERNELS)
     }
 
-    for(auto &macros : additional_macros)
+    for (auto &macros : additional_macros)
     {
         code += macros;
     }
 
     auto arguments = _components.get_argument_tensors();
-    std::sort(arguments.begin(), arguments.end(), [](const ITensorInfo * l, const ITensorInfo * r)
-    {
-        return l->id() < r->id();
-    });
+    std::sort(arguments.begin(), arguments.end(),
+              [](const ITensorInfo *l, const ITensorInfo *r) { return l->id() < r->id(); });
     code += write_kernel_signature(_vtable.get_variable_list(arguments));
 
     code += "\n{\n\n";
@@ -198,7 +196,7 @@ std::string ClTemplateWriter::write_code()
 
         tiles_ss << "    //------------------ START TILE DECLARATION ---------------------\n";
 
-        for(auto tile : tiles)
+        for (auto tile : tiles)
         {
             const auto var       = _vtable.get_variable(tile);
             const auto data_type = get_cl_type_from_data_type(tile->data_type());
@@ -212,7 +210,7 @@ std::string ClTemplateWriter::write_code()
         code += tiles_ss.str();
     }
 
-    for(const auto &component_code : component_codes)
+    for (const auto &component_code : component_codes)
     {
         code += component_code;
         code += "\n";
@@ -231,7 +229,8 @@ std::string ClTemplateWriter::write_global_section() const
     auto       leftover_w = dst_w % tile_w;
 
     std::string code = "";
-    code += std::string("    int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
+    code += std::string("    int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " +
+            std::to_string(leftover_w) + ");\n";
     code += std::string("    int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
     code += std::string("    int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
 
@@ -243,7 +242,7 @@ std::string ClTemplateWriter::write_global_section() const
 std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
 {
     std::string code;
-    switch(var.kernel_argument_info.type)
+    switch (var.kernel_argument_info.type)
     {
         case GpuKernelArgumentInfo::Type::Vector:
         {
@@ -293,11 +292,11 @@ std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTabl
 {
     std::string code = "\n__kernel void " + write_kernel_name() + "(";
 
-    for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
+    for (int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
     {
         code += write_argument_declaration(argument_list[i]) + ",";
     }
-    if(static_cast<int>(argument_list.size()) - 1 >= 0)
+    if (static_cast<int>(argument_list.size()) - 1 >= 0)
     {
         code += write_argument_declaration(argument_list[argument_list.size() - 1]);
     }
@@ -308,12 +307,12 @@ std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTabl
 }
 std::string ClTemplateWriter::write_kernel_name() const
 {
-    if(_components.empty())
+    if (_components.empty())
     {
         return "empty_kernel";
     }
     std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
-    for(size_t i = 1; i < _components.size(); ++i)
+    for (size_t i = 1; i < _components.size(); ++i)
     {
         name += "___";
         name += _components[i]->template_writer()->get_name();
diff --git a/src/dynamic_fusion/sketch/utils/DependencyGraph.h b/src/dynamic_fusion/sketch/utils/DependencyGraph.h
index c891e76d8b..c157c2b21c 100644
--- a/src/dynamic_fusion/sketch/utils/DependencyGraph.h
+++ b/src/dynamic_fusion/sketch/utils/DependencyGraph.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH
 
 #include "arm_compute/core/Error.h"
+
 #include <cstdint>
 #include <map>
 #include <set>
@@ -68,12 +69,10 @@ public:
         OperatorId            op{};
         std::vector<TensorId> inputs{};
         std::vector<TensorId> outputs{};
-        friend bool operator==(const OpPack &opp0, const OpPack &opp1)
+        friend bool           operator==(const OpPack &opp0, const OpPack &opp1)
         {
-            return std::make_tuple(
-                       opp0.op, opp0.inputs, opp0.outputs)
-                   == std::make_tuple(
-                       opp1.op, opp1.inputs, opp1.outputs);
+            return std::make_tuple(opp0.op, opp0.inputs, opp0.outputs) ==
+                   std::make_tuple(opp1.op, opp1.inputs, opp1.outputs);
         }
     };
 
@@ -95,10 +94,13 @@ public:
      * @return true  If the operator can be added while keeping the graph as a linear sequence
      * @return false  Otherwise
      */
-    bool try_add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs, bool is_output = false) const
+    bool try_add_operator_as_linear(OperatorId                   op,
+                                    const std::vector<TensorId> &inputs,
+                                    const std::vector<TensorId> &outputs,
+                                    bool                         is_output = false) const
     {
         ARM_COMPUTE_UNUSED(op, is_output);
-        if(all_ops().empty())
+        if (all_ops().empty())
         {
             return true;
         }
@@ -106,25 +108,25 @@ public:
         // If the new operator is not the first operator, at least one input tensor must be
         // the output tensor of the last non-output operator. All other input tensors must be
         // the global input of the graph (i.e. not the output of any operator).
-        if(_last_op_available)
+        if (_last_op_available)
         {
             auto use_input_from_last_op = false;
 
-            for(auto src_tensor : inputs)
+            for (auto src_tensor : inputs)
             {
                 const auto src_ops = _adj_src_ops.find(src_tensor);
 
-                if(src_ops != _adj_src_ops.end())
+                if (src_ops != _adj_src_ops.end())
                 {
                     ARM_COMPUTE_ERROR_ON(src_ops->second.size() > 1);
 
-                    if(!src_ops->second.empty())
+                    if (!src_ops->second.empty())
                     {
                         const auto src_op = src_ops->second[0];
 
-                        if(src_op == _last_op)
+                        if (src_op == _last_op)
                         {
-                            if(use_input_from_last_op)
+                            if (use_input_from_last_op)
                             {
                                 // To be safe, we also forbid using the output tensor
                                 // of the last operator twice.
@@ -143,7 +145,7 @@ public:
                 }
             }
 
-            if(!use_input_from_last_op)
+            if (!use_input_from_last_op)
             {
                 // At least one input tensor must be the output tensor of the last non-output operator.
                 return false;
@@ -152,9 +154,9 @@ public:
 
         // The output tensor of the new operator must not be the input tensor of any previously
         // added operator.
-        for(auto dst_tensor : outputs)
+        for (auto dst_tensor : outputs)
         {
-            if(_adj_dst_ops.find(dst_tensor) != _adj_dst_ops.end())
+            if (_adj_dst_ops.find(dst_tensor) != _adj_dst_ops.end())
             {
                 return false;
             }
@@ -168,7 +170,10 @@ public:
      * INVARIANT: The list can only grow from head to tail
      * INVARIANT: POSTCONDITION: The graph is linear
      */
-    void add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs, bool is_output = false)
+    void add_operator_as_linear(OperatorId                   op,
+                                const std::vector<TensorId> &inputs,
+                                const std::vector<TensorId> &outputs,
+                                bool                         is_output = false)
     {
         const auto success = add_operator(op, inputs, outputs, is_output);
         ARM_COMPUTE_UNUSED(success);
@@ -183,24 +188,27 @@ public:
      * @param[in] outputs   Output tensors to the operator
      * @param[in] is_output Whether this is an output operator
      */
-    bool add_operator(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs, bool is_output = false)
+    bool add_operator(OperatorId                   op,
+                      const std::vector<TensorId> &inputs,
+                      const std::vector<TensorId> &outputs,
+                      bool                         is_output = false)
     {
-        if(operator_exists(op))
+        if (operator_exists(op))
         {
             return false;
         }
         _adj_src_tensors[op] = {};
         _adj_dst_tensors[op] = {};
-        for(auto in_tensor : inputs)
+        for (auto in_tensor : inputs)
         {
             // Linking input tensor to operator node will never create a cycle / loop because we guarantee
             // each op is newly created, so every <input, op> pair / edge is new
             link_input(op, in_tensor);
         }
-        for(auto out_tensor : outputs)
+        for (auto out_tensor : outputs)
         {
             // If there exists a back path from op's output tensor to op already, then linking the two will create a loop / cycle
-            if(path_exists_from_tensor_to_op(out_tensor, op))
+            if (path_exists_from_tensor_to_op(out_tensor, op))
             {
                 remove_operator(op);
                 return false;
@@ -211,10 +219,10 @@ public:
             }
         }
 
-        if(!is_output)
+        if (!is_output)
         {
             _last_op_available = true;
-            _last_op = op;
+            _last_op           = op;
         }
 
         return true;
@@ -230,16 +238,16 @@ public:
     std::vector<OpPack> build_operators_sequence() const
     {
         std::vector<OpPack> ops_seq;
-        std::set<Id> done_ops;
-        std::set<Id> done_tensors;
+        std::set<Id>        done_ops;
+        std::set<Id>        done_tensors;
 
         const auto input_tensors = global_src_tensors();
 
-        for(auto tensor : input_tensors)
+        for (auto tensor : input_tensors)
         {
             done_tensors.insert(tensor);
 
-            for(auto op : _adj_dst_ops.at(tensor))
+            for (auto op : _adj_dst_ops.at(tensor))
             {
                 build_operators_sequence_from_op(op, ops_seq, done_ops, done_tensors);
             }
@@ -260,10 +268,8 @@ public:
     friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1)
     {
         // Do not compare id allocators
-        return std::make_tuple(
-                   g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops)
-               == std::make_tuple(
-                   g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops);
+        return std::make_tuple(g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops) ==
+               std::make_tuple(g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops);
     }
     std::vector<OperatorId> src_ops_from_tensor(TensorId tensor) const
     {
@@ -280,10 +286,8 @@ public:
     std::vector<TensorId> all_tensors() const
     {
         std::vector<TensorId> tensors{};
-        std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors), [](const auto & it)
-        {
-            return it.first;
-        });
+        std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors),
+                       [](const auto &it) { return it.first; });
         return tensors;
     }
     /** Get source tensors of the whole graph
@@ -293,9 +297,9 @@ public:
     std::vector<TensorId> global_src_tensors() const
     {
         std::vector<TensorId> tensors;
-        for(auto tensor_src_ops : _adj_src_ops)
+        for (auto tensor_src_ops : _adj_src_ops)
         {
-            if(tensor_src_ops.second.empty())
+            if (tensor_src_ops.second.empty())
             {
                 tensors.push_back(tensor_src_ops.first);
             }
@@ -309,9 +313,9 @@ public:
     std::vector<TensorId> global_dst_tensors() const
     {
         std::vector<TensorId> tensors;
-        for(auto tensor_dst_ops : _adj_dst_ops)
+        for (auto tensor_dst_ops : _adj_dst_ops)
         {
-            if(tensor_dst_ops.second.empty())
+            if (tensor_dst_ops.second.empty())
             {
                 tensors.push_back(tensor_dst_ops.first);
             }
@@ -328,14 +332,14 @@ public:
 
         // If a tensor is used to connect the input of an operator and the output of another operator,
         // it is not allocated in the memory. The tensor exists as a temporary variable only.
-        for(auto src_tensor : _adj_src_ops)
+        for (auto src_tensor : _adj_src_ops)
         {
-            if(!src_tensor.second.empty())
+            if (!src_tensor.second.empty())
             {
                 const auto dst_tensor = _adj_dst_ops.find(src_tensor.first);
-                if(dst_tensor != _adj_dst_ops.end())
+                if (dst_tensor != _adj_dst_ops.end())
                 {
-                    if(!dst_tensor->second.empty())
+                    if (!dst_tensor->second.empty())
                     {
                         tensors.push_back(src_tensor.first);
                     }
@@ -354,9 +358,9 @@ public:
         std::vector<OperatorId> ops{};
         const auto              op_list = all_ops();
 
-        for(auto op : op_list)
+        for (auto op : op_list)
         {
-            if(src_ops(op).empty())
+            if (src_ops(op).empty())
             {
                 ops.emplace_back(op);
             }
@@ -368,7 +372,7 @@ private:
     void link_input(OperatorId op, TensorId in_tensor)
     {
         ARM_COMPUTE_ERROR_ON(!operator_exists(op));
-        if(!tensor_exists(in_tensor))
+        if (!tensor_exists(in_tensor))
         {
             insert_new_tensor(in_tensor);
         }
@@ -379,7 +383,7 @@ private:
     void link_output(OperatorId op, TensorId out_tensor)
     {
         ARM_COMPUTE_ERROR_ON(!operator_exists(op));
-        if(!tensor_exists(out_tensor))
+        if (!tensor_exists(out_tensor))
         {
             insert_new_tensor(out_tensor);
         }
@@ -392,7 +396,7 @@ private:
     {
         ARM_COMPUTE_ERROR_ON(!operator_exists(op));
         std::vector<OperatorId> ops{};
-        for(TensorId src_tensor : src_tensors(op))
+        for (TensorId src_tensor : src_tensors(op))
         {
             ops.insert(ops.end(), std::begin(_adj_src_ops.at(src_tensor)), std::end(_adj_src_ops.at(src_tensor)));
         }
@@ -402,7 +406,7 @@ private:
     {
         ARM_COMPUTE_ERROR_ON(!operator_exists(op));
         std::vector<OperatorId> ops{};
-        for(TensorId dst_tensor : _adj_dst_tensors.at(op))
+        for (TensorId dst_tensor : _adj_dst_tensors.at(op))
         {
             ops.insert(ops.end(), std::begin(_adj_dst_ops.at(dst_tensor)), std::end(_adj_dst_ops.at(dst_tensor)));
         }
@@ -436,10 +440,8 @@ private:
     std::vector<OperatorId> all_ops() const
     {
         std::vector<OperatorId> ops{};
-        std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops), [](const auto & it)
-        {
-            return it.first;
-        });
+        std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops),
+                       [](const auto &it) { return it.first; });
         return ops;
     }
     /** Remove an operator from graph.
@@ -448,25 +450,21 @@ private:
      */
     void remove_operator(OperatorId op)
     {
-        for(auto src_tensor : _adj_src_tensors.at(op))
+        for (auto src_tensor : _adj_src_tensors.at(op))
         {
             auto &dst_ops = _adj_dst_ops.at(src_tensor);
-            dst_ops.erase(
-                std::remove(std::begin(dst_ops), std::end(dst_ops), op),
-                std::end(dst_ops));
+            dst_ops.erase(std::remove(std::begin(dst_ops), std::end(dst_ops), op), std::end(dst_ops));
         }
-        for(auto dst_tensor : _adj_dst_tensors.at(op))
+        for (auto dst_tensor : _adj_dst_tensors.at(op))
         {
             auto &src_ops = _adj_src_ops.at(dst_tensor);
-            src_ops.erase(
-                std::remove(std::begin(src_ops), std::end(src_ops), op),
-                std::end(src_ops));
+            src_ops.erase(std::remove(std::begin(src_ops), std::end(src_ops), op), std::end(src_ops));
         }
         // Remove any isolated tensors
         // An isolated tensor is one where both its _adj_src_ops and _adj_dst_ops are empty
-        for(auto t : all_tensors())
+        for (auto t : all_tensors())
         {
-            if(_adj_src_ops.at(t).empty() && _adj_dst_ops.at(t).empty())
+            if (_adj_src_ops.at(t).empty() && _adj_dst_ops.at(t).empty())
             {
                 _adj_src_ops.erase(t);
                 _adj_dst_ops.erase(t);
@@ -486,11 +484,12 @@ private:
     }
     bool operator_exists(OperatorId op) const
     {
-        return _adj_src_tensors.find(op) != _adj_src_tensors.end() && _adj_dst_tensors.find(op) != _adj_dst_tensors.end();
+        return _adj_src_tensors.find(op) != _adj_src_tensors.end() &&
+               _adj_dst_tensors.find(op) != _adj_dst_tensors.end();
     }
     bool is_src_tensor_of(OperatorId op, TensorId tensor) const
     {
-        if(!operator_exists(op) || !tensor_exists(tensor))
+        if (!operator_exists(op) || !tensor_exists(tensor))
         {
             return false;
         }
@@ -499,7 +498,7 @@ private:
     }
     bool is_dst_tensor_of(OperatorId op, TensorId tensor) const
     {
-        if(!operator_exists(op) || !tensor_exists(tensor))
+        if (!operator_exists(op) || !tensor_exists(tensor))
         {
             return false;
         }
@@ -525,9 +524,9 @@ private:
         std::vector<OperatorId> ops{};
         const auto              op_list = all_ops();
 
-        for(auto op : op_list)
+        for (auto op : op_list)
         {
-            if(is_dst_op(op))
+            if (is_dst_op(op))
             {
                 ops.emplace_back(op);
             }
@@ -536,13 +535,13 @@ private:
     }
     bool path_exists_from_tensor_to_op(TensorId src_tensor, OperatorId dst_op) const
     {
-        if(!tensor_exists(src_tensor) || !operator_exists(dst_op))
+        if (!tensor_exists(src_tensor) || !operator_exists(dst_op))
         {
             return false;
         }
-        for(auto child_op : dst_ops_from_tensor(src_tensor))
+        for (auto child_op : dst_ops_from_tensor(src_tensor))
         {
-            if(path_exists_from_op_to_op(child_op, dst_op))
+            if (path_exists_from_op_to_op(child_op, dst_op))
             {
                 return true;
             }
@@ -552,21 +551,21 @@ private:
 
     bool path_exists_from_op_to_op(OperatorId src_op, OperatorId dst_op) const
     {
-        if(!operator_exists(src_op) || !operator_exists(dst_op))
+        if (!operator_exists(src_op) || !operator_exists(dst_op))
         {
             return false;
         }
-        if(src_op == dst_op)
+        if (src_op == dst_op)
         {
             return true;
         }
-        if(is_in(src_op, get_dst_ops()))
+        if (is_in(src_op, get_dst_ops()))
         {
             return false;
         }
-        for(auto child_tensor : dst_tensors(src_op))
+        for (auto child_tensor : dst_tensors(src_op))
         {
-            if(path_exists_from_tensor_to_op(child_tensor, dst_op))
+            if (path_exists_from_tensor_to_op(child_tensor, dst_op))
             {
                 return true;
             }
@@ -574,16 +573,15 @@ private:
         return false;
     }
 
-    void build_operators_sequence_from_op(
-        Id op,
-        std::vector<OpPack> &ops_seq,
-        std::set<Id> &done_ops,
-        std::set<Id> &done_tensors) const
+    void build_operators_sequence_from_op(Id                   op,
+                                          std::vector<OpPack> &ops_seq,
+                                          std::set<Id>        &done_ops,
+                                          std::set<Id>        &done_tensors) const
     {
-        while(true)
+        while (true)
         {
             // If the operator has been added to the sequence, ignore it.
-            if(done_ops.find(op) != done_ops.end())
+            if (done_ops.find(op) != done_ops.end())
             {
                 return;
             }
@@ -593,9 +591,9 @@ private:
             // is added to the sequence.
             const auto src_tensors = _adj_src_tensors.at(op);
 
-            for(auto src : src_tensors)
+            for (auto src : src_tensors)
             {
-                if(done_tensors.find(src) == done_tensors.end())
+                if (done_tensors.find(src) == done_tensors.end())
                 {
                     return;
                 }
@@ -606,24 +604,24 @@ private:
 
             done_ops.insert(op);
 
-            OpPack pack{ op, src_tensors, dst_tensors };
+            OpPack pack{op, src_tensors, dst_tensors};
             ops_seq.push_back(pack);
 
             done_tensors.insert(dst_tensors.begin(), dst_tensors.end());
 
             // Visit all the sink operators.
             // Call this function recursively unless there is only one sink.
-            if(dst_tensors.size() == 1 && _adj_dst_ops.at(dst_tensors[0]).size() == 1)
+            if (dst_tensors.size() == 1 && _adj_dst_ops.at(dst_tensors[0]).size() == 1)
             {
                 op = _adj_dst_ops.at(dst_tensors[0])[0];
             }
             else
             {
-                for(auto dst_tensor : dst_tensors)
+                for (auto dst_tensor : dst_tensors)
                 {
                     const auto dst_ops = _adj_dst_ops.at(dst_tensor);
 
-                    for(auto dst_op : dst_ops)
+                    for (auto dst_op : dst_ops)
                     {
                         build_operators_sequence_from_op(dst_op, ops_seq, done_ops, done_tensors);
                     }
@@ -640,8 +638,8 @@ private:
     AdjList _adj_src_ops{};
     AdjList _adj_dst_ops{};
 
-    bool _last_op_available{ false };
-    OperatorId _last_op{ 0 };
+    bool       _last_op_available{false};
+    OperatorId _last_op{0};
 };
 
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/utils/Utils.h b/src/dynamic_fusion/utils/Utils.h
index c9fc2c610f..3f4a2edd03 100644
--- a/src/dynamic_fusion/utils/Utils.h
+++ b/src/dynamic_fusion/utils/Utils.h
@@ -63,17 +63,21 @@ inline bool is_invalid_tensor(const ITensorInfo *tensor_info)
 
 /** Inline function to convert @ref Pool2dAttributes to PoolingLayerInfo
 */
-inline PoolingLayerInfo convert_pool_attr_to_pool_info(const Pool2dAttributes &pool_attr, bool mixed_precision = false, DataLayout data_layout = DataLayout::NHWC)
+inline PoolingLayerInfo convert_pool_attr_to_pool_info(const Pool2dAttributes &pool_attr,
+                                                       bool                    mixed_precision = false,
+                                                       DataLayout              data_layout     = DataLayout::NHWC)
 {
     // Create PadStrideInfo
     const Size2D        stride  = pool_attr.stride();
     const Padding2D     padding = pool_attr.pad();
-    const PadStrideInfo pad_stride(stride.x(), stride.y(), padding.left, padding.top, arm_compute::DimensionRoundingType::FLOOR);
+    const PadStrideInfo pad_stride(stride.x(), stride.y(), padding.left, padding.top,
+                                   arm_compute::DimensionRoundingType::FLOOR);
 
-    return PoolingLayerInfo(pool_attr.pool_type(), pool_attr.pool_size(), data_layout, pad_stride, pool_attr.exclude_padding(), mixed_precision);
-}
-}
-}
+    return PoolingLayerInfo(pool_attr.pool_type(), pool_attr.pool_size(), data_layout, pad_stride,
+                            pool_attr.exclude_padding(), mixed_precision);
 }
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
 
 #endif /* SRC_DYNAMIC_FUSION_UTILS_UTILS */
author	Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>	2023-09-27 17:46:17 +0100
committer	felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>	2023-09-28 12:08:05 +0000
commit	afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree	03bc7d5a762099989b16a656fa8d397b490ed70e /src/dynamic_fusion
parent	bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
download	ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz