7 files changed, 223 insertions, 140 deletions
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp
index 5311fdcec3..98ebf3ebbe 100644
--- a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp
@@ -22,7 +22,6 @@
  * SOFTWARE.
  */
 #include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h"
-#include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h"
 
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/GPUTarget.h"
@@ -30,28 +29,34 @@
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
 
+#include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h"
+
 namespace arm_compute
 {
 namespace cl_dwc
 {
 namespace
 {
-DWCComputeKernelInfo configure_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                   unsigned int depth_multiplier, bool is_g71)
+DWCComputeKernelInfo configure_f32(const ITensorInfo   *src,
+                                   const ITensorInfo   *wei,
+                                   const PadStrideInfo &conv_info,
+                                   const Size2D        &dilation,
+                                   unsigned int         depth_multiplier,
+                                   bool                 is_g71)
 {
     DWCComputeKernelInfo desc;
 
-    if(src->data_layout() == DataLayout::NHWC)
+    if (src->data_layout() == DataLayout::NHWC)
     {
-        const size_t idx_c          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
-        const size_t idx_w          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
+        const size_t      idx_c     = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
+        const size_t      idx_w     = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
         const TensorShape wei_shape = wei->tensor_shape();
         const size_t      kernel_c  = wei_shape[idx_c];
         const size_t      kernel_w  = wei_shape[idx_w];
 
         desc.export_input_to_cl_image = false;
 
-        if(is_g71)
+        if (is_g71)
         {
             desc.export_weights_to_cl_image = false;
         }
@@ -60,17 +65,17 @@ DWCComputeKernelInfo configure_f32(const ITensorInfo *src, const ITensorInfo *we
             desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
         }
 
-        if(depth_multiplier == 1)
+        if (depth_multiplier == 1)
         {
             desc.n0 = 4;
         }
         else
         {
-            if((depth_multiplier % 4) == 0)
+            if ((depth_multiplier % 4) == 0)
             {
                 desc.n0 = 4;
             }
-            else if((depth_multiplier % 2) == 0)
+            else if ((depth_multiplier % 2) == 0)
             {
                 desc.n0 = 2;
             }
@@ -81,14 +86,15 @@ DWCComputeKernelInfo configure_f32(const ITensorInfo *src, const ITensorInfo *we
         }
 
         // Note: If we reduce n0, export to cl_image must be false
-        ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
+        ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) &&
+                             (desc.export_weights_to_cl_image == true));
 
         desc.n0 = adjust_vec_size(desc.n0, kernel_c);
 
         // Set m0 only if stride_x == 1 and dilation_x == 1
-        if(conv_info.stride().first == 1 && dilation.x() == 1)
+        if (conv_info.stride().first == 1 && dilation.x() == 1)
         {
-            if((kernel_w >= 9) || (kernel_w == 1))
+            if ((kernel_w >= 9) || (kernel_w == 1))
             {
                 desc.m0 = 1;
             }
@@ -106,16 +112,20 @@ DWCComputeKernelInfo configure_f32(const ITensorInfo *src, const ITensorInfo *we
     return desc;
 }
 
-DWCComputeKernelInfo configure_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                   unsigned int depth_multiplier, bool is_g71)
+DWCComputeKernelInfo configure_f16(const ITensorInfo   *src,
+                                   const ITensorInfo   *wei,
+                                   const PadStrideInfo &conv_info,
+                                   const Size2D        &dilation,
+                                   unsigned int         depth_multiplier,
+                                   bool                 is_g71)
 {
     DWCComputeKernelInfo desc;
 
-    if(src->data_layout() == DataLayout::NHWC)
+    if (src->data_layout() == DataLayout::NHWC)
     {
         // Src and weights have the same dimension indices
-        const size_t idx_c          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
-        const size_t idx_w          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
+        const size_t      idx_c     = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
+        const size_t      idx_w     = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
         const TensorShape src_shape = src->tensor_shape();
         const TensorShape wei_shape = wei->tensor_shape();
         const size_t      src_w     = src_shape[idx_w];
@@ -124,7 +134,7 @@ DWCComputeKernelInfo configure_f16(const ITensorInfo *src, const ITensorInfo *we
 
         desc.export_input_to_cl_image = false;
 
-        if(is_g71)
+        if (is_g71)
         {
             desc.export_weights_to_cl_image = false;
         }
@@ -133,9 +143,9 @@ DWCComputeKernelInfo configure_f16(const ITensorInfo *src, const ITensorInfo *we
             desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
         }
 
-        if(depth_multiplier == 1)
+        if (depth_multiplier == 1)
         {
-            if(desc.export_weights_to_cl_image == false)
+            if (desc.export_weights_to_cl_image == false)
             {
                 desc.n0 = 8;
             }
@@ -146,11 +156,11 @@ DWCComputeKernelInfo configure_f16(const ITensorInfo *src, const ITensorInfo *we
         }
         else
         {
-            if((depth_multiplier % 4) == 0)
+            if ((depth_multiplier % 4) == 0)
             {
                 desc.n0 = 4;
             }
-            else if((depth_multiplier % 2) == 0)
+            else if ((depth_multiplier % 2) == 0)
             {
                 desc.n0 = 2;
             }
@@ -161,20 +171,21 @@ DWCComputeKernelInfo configure_f16(const ITensorInfo *src, const ITensorInfo *we
         }
 
         // Note: If we reduce n0, export to cl_image must be false
-        ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
+        ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) &&
+                             (desc.export_weights_to_cl_image == true));
 
         desc.n0 = adjust_vec_size(desc.n0, kernel_c);
 
         // Set m0 only if stride_x == 1 and dilation_x == 1
-        if(conv_info.stride().first == 1 && dilation.x() == 1)
+        if (conv_info.stride().first == 1 && dilation.x() == 1)
         {
-            if((kernel_w >= 9) || (kernel_w == 1))
+            if ((kernel_w >= 9) || (kernel_w == 1))
             {
                 desc.m0 = 1;
             }
             else
             {
-                if((src_w % 5) == 0)
+                if ((src_w % 5) == 0)
                 {
                     desc.m0 = 5;
                 }
@@ -194,27 +205,30 @@ DWCComputeKernelInfo configure_f16(const ITensorInfo *src, const ITensorInfo *we
 }
 } // namespace
 
-ClDWCNativeDefaultConfigBifrost::ClDWCNativeDefaultConfigBifrost(GPUTarget gpu)
-    : IClDWCNativeKernelConfig(gpu)
+ClDWCNativeDefaultConfigBifrost::ClDWCNativeDefaultConfigBifrost(GPUTarget gpu) : IClDWCNativeKernelConfig(gpu)
 {
 }
 
-DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                unsigned int depth_multiplier)
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure(const ITensorInfo   *src,
+                                                                const ITensorInfo   *wei,
+                                                                const PadStrideInfo &conv_info,
+                                                                const Size2D        &dilation,
+                                                                unsigned int         depth_multiplier)
 {
-    using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigBifrost::*)(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                   unsigned int depth_multiplier);
+    using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigBifrost::*)(
+        const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
+        unsigned int depth_multiplier);
 
-    ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(&ClDWCNativeDefaultConfigBifrost::configure_G71_f32,
-                                                                         &ClDWCNativeDefaultConfigBifrost::configure_G71_f16,
-                                                                         &ClDWCNativeDefaultConfigBifrost::configure_G7x_u8);
+    ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(
+        &ClDWCNativeDefaultConfigBifrost::configure_G71_f32, &ClDWCNativeDefaultConfigBifrost::configure_G71_f16,
+        &ClDWCNativeDefaultConfigBifrost::configure_G7x_u8);
 
-    ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(&ClDWCNativeDefaultConfigBifrost::configure_G7x_f32,
-                                                                         &ClDWCNativeDefaultConfigBifrost::configure_G7x_f16,
-                                                                         &ClDWCNativeDefaultConfigBifrost::configure_G7x_u8);
+    ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(
+        &ClDWCNativeDefaultConfigBifrost::configure_G7x_f32, &ClDWCNativeDefaultConfigBifrost::configure_G7x_f16,
+        &ClDWCNativeDefaultConfigBifrost::configure_G7x_u8);
 
     ConfigurationFunctionExecutorPtr func = nullptr;
-    switch(_target)
+    switch (_target)
     {
         case GPUTarget::G71:
             func = configs_G71.get_function(src->data_type());
@@ -228,43 +242,58 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure(const ITensorInf
     return (this->*func)(src, wei, conv_info, dilation, depth_multiplier);
 }
 
-DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G71_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                        unsigned int depth_multiplier)
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G71_f32(const ITensorInfo   *src,
+                                                                        const ITensorInfo   *wei,
+                                                                        const PadStrideInfo &conv_info,
+                                                                        const Size2D        &dilation,
+                                                                        unsigned int         depth_multiplier)
 {
     return configure_f32(src, wei, conv_info, dilation, depth_multiplier, true);
 }
 
-DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G71_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                        unsigned int depth_multiplier)
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G71_f16(const ITensorInfo   *src,
+                                                                        const ITensorInfo   *wei,
+                                                                        const PadStrideInfo &conv_info,
+                                                                        const Size2D        &dilation,
+                                                                        unsigned int         depth_multiplier)
 {
     return configure_f16(src, wei, conv_info, dilation, depth_multiplier, true);
 }
 
-DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                        unsigned int depth_multiplier)
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_f32(const ITensorInfo   *src,
+                                                                        const ITensorInfo   *wei,
+                                                                        const PadStrideInfo &conv_info,
+                                                                        const Size2D        &dilation,
+                                                                        unsigned int         depth_multiplier)
 {
     return configure_f32(src, wei, conv_info, dilation, depth_multiplier, false);
 }
 
-DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                        unsigned int depth_multiplier)
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_f16(const ITensorInfo   *src,
+                                                                        const ITensorInfo   *wei,
+                                                                        const PadStrideInfo &conv_info,
+                                                                        const Size2D        &dilation,
+                                                                        unsigned int         depth_multiplier)
 {
     return configure_f16(src, wei, conv_info, dilation, depth_multiplier, false);
 }
 
-DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                        unsigned int depth_multiplier)
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_u8(const ITensorInfo   *src,
+                                                                       const ITensorInfo   *wei,
+                                                                       const PadStrideInfo &conv_info,
+                                                                       const Size2D        &dilation,
+                                                                       unsigned int         depth_multiplier)
 {
     ARM_COMPUTE_UNUSED(wei);
 
     DWCComputeKernelInfo desc;
 
-    if(src->data_layout() == DataLayout::NHWC)
+    if (src->data_layout() == DataLayout::NHWC)
     {
         desc.export_input_to_cl_image   = false;
         desc.export_weights_to_cl_image = false;
         desc.n0                         = (depth_multiplier == 1) ? 4 : 1;
-        if(conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1)
+        if (conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1)
         {
             desc.m0 = 2;
         }
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h
index cec2cae5dd..41d86c9c14 100644
--- a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h
@@ -41,20 +41,38 @@ public:
     ClDWCNativeDefaultConfigBifrost(GPUTarget gpu);
 
     // Inherited overridden method
-    DWCComputeKernelInfo configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                   unsigned int depth_multiplier) override;
+    DWCComputeKernelInfo configure(const ITensorInfo   *src,
+                                   const ITensorInfo   *wei,
+                                   const PadStrideInfo &conv_info,
+                                   const Size2D        &dilation,
+                                   unsigned int         depth_multiplier) override;
 
 private:
-    DWCComputeKernelInfo configure_G71_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                           unsigned int depth_multiplier);
-    DWCComputeKernelInfo configure_G71_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                           unsigned int depth_multiplier);
-    DWCComputeKernelInfo configure_G7x_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                           unsigned int depth_multiplier);
-    DWCComputeKernelInfo configure_G7x_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                           unsigned int depth_multiplier);
-    DWCComputeKernelInfo configure_G7x_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                          unsigned int depth_multiplier);
+    DWCComputeKernelInfo configure_G71_f32(const ITensorInfo   *src,
+                                           const ITensorInfo   *wei,
+                                           const PadStrideInfo &conv_info,
+                                           const Size2D        &dilation,
+                                           unsigned int         depth_multiplier);
+    DWCComputeKernelInfo configure_G71_f16(const ITensorInfo   *src,
+                                           const ITensorInfo   *wei,
+                                           const PadStrideInfo &conv_info,
+                                           const Size2D        &dilation,
+                                           unsigned int         depth_multiplier);
+    DWCComputeKernelInfo configure_G7x_f32(const ITensorInfo   *src,
+                                           const ITensorInfo   *wei,
+                                           const PadStrideInfo &conv_info,
+                                           const Size2D        &dilation,
+                                           unsigned int         depth_multiplier);
+    DWCComputeKernelInfo configure_G7x_f16(const ITensorInfo   *src,
+                                           const ITensorInfo   *wei,
+                                           const PadStrideInfo &conv_info,
+                                           const Size2D        &dilation,
+                                           unsigned int         depth_multiplier);
+    DWCComputeKernelInfo configure_G7x_u8(const ITensorInfo   *src,
+                                          const ITensorInfo   *wei,
+                                          const PadStrideInfo &conv_info,
+                                          const Size2D        &dilation,
+                                          unsigned int         depth_multiplier);
 };
 } // namespace cl_dwc
 } // namespace arm_compute
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp
index 51f3787875..ef1bb3858c 100644
--- a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp
@@ -22,7 +22,6 @@
  * SOFTWARE.
  */
 #include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h"
-#include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h"
 
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/GPUTarget.h"
@@ -30,31 +29,36 @@
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
 
+#include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h"
+
 namespace arm_compute
 {
 namespace cl_dwc
 {
-ClDWCNativeDefaultConfigValhall::ClDWCNativeDefaultConfigValhall(GPUTarget gpu)
-    : IClDWCNativeKernelConfig(gpu)
+ClDWCNativeDefaultConfigValhall::ClDWCNativeDefaultConfigValhall(GPUTarget gpu) : IClDWCNativeKernelConfig(gpu)
 {
 }
 
-DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                unsigned int depth_multiplier)
+DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure(const ITensorInfo   *src,
+                                                                const ITensorInfo   *wei,
+                                                                const PadStrideInfo &conv_info,
+                                                                const Size2D        &dilation,
+                                                                unsigned int         depth_multiplier)
 {
-    using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigValhall::*)(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                   unsigned int depth_multiplier);
+    using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigValhall::*)(
+        const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
+        unsigned int depth_multiplier);
 
-    ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClDWCNativeDefaultConfigValhall::configure_G78_f32,
-                                                                         &ClDWCNativeDefaultConfigValhall::configure_G78_f16,
-                                                                         &ClDWCNativeDefaultConfigValhall::configure_G78_u8);
+    ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(
+        &ClDWCNativeDefaultConfigValhall::configure_G78_f32, &ClDWCNativeDefaultConfigValhall::configure_G78_f16,
+        &ClDWCNativeDefaultConfigValhall::configure_G78_u8);
 
-    ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(&ClDWCNativeDefaultConfigValhall::configure_G78_f32,
-                                                                         &ClDWCNativeDefaultConfigValhall::configure_G77_f16,
-                                                                         &ClDWCNativeDefaultConfigValhall::configure_G78_u8);
+    ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(
+        &ClDWCNativeDefaultConfigValhall::configure_G78_f32, &ClDWCNativeDefaultConfigValhall::configure_G77_f16,
+        &ClDWCNativeDefaultConfigValhall::configure_G78_u8);
 
     ConfigurationFunctionExecutorPtr func = nullptr;
-    switch(_target)
+    switch (_target)
     {
         case GPUTarget::G77:
             func = configs_G77.get_function(src->data_type());
@@ -69,15 +73,18 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure(const ITensorInf
     return (this->*func)(src, wei, conv_info, dilation, depth_multiplier);
 }
 
-DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                        unsigned int depth_multiplier)
+DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const ITensorInfo   *src,
+                                                                        const ITensorInfo   *wei,
+                                                                        const PadStrideInfo &conv_info,
+                                                                        const Size2D        &dilation,
+                                                                        unsigned int         depth_multiplier)
 {
     DWCComputeKernelInfo desc;
 
-    if(src->data_layout() == DataLayout::NHWC)
+    if (src->data_layout() == DataLayout::NHWC)
     {
-        const size_t idx_c          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
-        const size_t idx_w          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
+        const size_t      idx_c     = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
+        const size_t      idx_w     = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
         const TensorShape wei_shape = wei->tensor_shape();
         const size_t      kernel_c  = wei_shape[idx_c];
         const size_t      kernel_w  = wei_shape[idx_w];
@@ -85,17 +92,17 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const IT
         desc.export_input_to_cl_image   = false;
         desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
 
-        if(depth_multiplier == 1)
+        if (depth_multiplier == 1)
         {
             desc.n0 = 4;
         }
         else
         {
-            if((depth_multiplier % 4) == 0)
+            if ((depth_multiplier % 4) == 0)
             {
                 desc.n0 = 4;
             }
-            else if((depth_multiplier % 2) == 0)
+            else if ((depth_multiplier % 2) == 0)
             {
                 desc.n0 = 2;
             }
@@ -106,14 +113,15 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const IT
         }
 
         // Note: If we reduce n0, export to cl_image must be false
-        ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
+        ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) &&
+                             (desc.export_weights_to_cl_image == true));
 
         desc.n0 = adjust_vec_size(desc.n0, kernel_c);
 
         // Set m0 only if stride_x == 1 and dilation_x == 1
-        if(conv_info.stride().first == 1 && dilation.x() == 1)
+        if (conv_info.stride().first == 1 && dilation.x() == 1)
         {
-            if((kernel_w >= 9) || (kernel_w == 1))
+            if ((kernel_w >= 9) || (kernel_w == 1))
             {
                 desc.m0 = 1;
             }
@@ -131,16 +139,19 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const IT
     return desc;
 }
 
-DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                        unsigned int depth_multiplier)
+DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const ITensorInfo   *src,
+                                                                        const ITensorInfo   *wei,
+                                                                        const PadStrideInfo &conv_info,
+                                                                        const Size2D        &dilation,
+                                                                        unsigned int         depth_multiplier)
 {
     DWCComputeKernelInfo desc;
 
-    if(src->data_layout() == DataLayout::NHWC)
+    if (src->data_layout() == DataLayout::NHWC)
     {
         // Src and weights have the same dimension indices
-        const size_t idx_c          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
-        const size_t idx_w          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
+        const size_t      idx_c     = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
+        const size_t      idx_w     = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
         const TensorShape src_shape = src->tensor_shape();
         const TensorShape wei_shape = wei->tensor_shape();
         const size_t      src_w     = src_shape[idx_w];
@@ -150,9 +161,9 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const IT
         desc.export_input_to_cl_image   = false;
         desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
 
-        if(depth_multiplier == 1)
+        if (depth_multiplier == 1)
         {
-            if(desc.export_weights_to_cl_image == false)
+            if (desc.export_weights_to_cl_image == false)
             {
                 desc.n0 = 8;
             }
@@ -163,11 +174,11 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const IT
         }
         else
         {
-            if((depth_multiplier % 4) == 0)
+            if ((depth_multiplier % 4) == 0)
             {
                 desc.n0 = 4;
             }
-            else if((depth_multiplier % 2) == 0)
+            else if ((depth_multiplier % 2) == 0)
             {
                 desc.n0 = 2;
             }
@@ -178,20 +189,21 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const IT
         }
 
         // Note: If we reduce n0, export to cl_image must be false
-        ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
+        ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) &&
+                             (desc.export_weights_to_cl_image == true));
 
         desc.n0 = adjust_vec_size(desc.n0, kernel_c);
 
         // Set m0 only if stride_x == 1 and dilation_x == 1
-        if(conv_info.stride().first == 1 && dilation.x() == 1)
+        if (conv_info.stride().first == 1 && dilation.x() == 1)
         {
-            if((kernel_w >= 9) || (kernel_w == 1))
+            if ((kernel_w >= 9) || (kernel_w == 1))
             {
                 desc.m0 = 1;
             }
             else
             {
-                if((src_w % 5) == 0)
+                if ((src_w % 5) == 0)
                 {
                     desc.m0 = 5;
                 }
@@ -210,19 +222,22 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const IT
     return desc;
 }
 
-DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                       unsigned int depth_multiplier)
+DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_u8(const ITensorInfo   *src,
+                                                                       const ITensorInfo   *wei,
+                                                                       const PadStrideInfo &conv_info,
+                                                                       const Size2D        &dilation,
+                                                                       unsigned int         depth_multiplier)
 {
     ARM_COMPUTE_UNUSED(wei);
 
     DWCComputeKernelInfo desc;
 
-    if(src->data_layout() == DataLayout::NHWC)
+    if (src->data_layout() == DataLayout::NHWC)
     {
         desc.export_input_to_cl_image   = false;
         desc.export_weights_to_cl_image = false;
         desc.n0                         = (depth_multiplier == 1) ? 4 : 1;
-        if(conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1)
+        if (conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1)
         {
             desc.m0 = 2;
         }
@@ -235,15 +250,18 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_u8(const ITe
     return desc;
 }
 
-DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                                                        unsigned int depth_multiplier)
+DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const ITensorInfo   *src,
+                                                                        const ITensorInfo   *wei,
+                                                                        const PadStrideInfo &conv_info,
+                                                                        const Size2D        &dilation,
+                                                                        unsigned int         depth_multiplier)
 {
     DWCComputeKernelInfo desc;
 
-    if(src->data_layout() == DataLayout::NHWC)
+    if (src->data_layout() == DataLayout::NHWC)
     {
-        const size_t idx_c          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
-        const size_t idx_w          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
+        const size_t      idx_c     = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
+        const size_t      idx_w     = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
         const TensorShape wei_shape = wei->tensor_shape();
         const size_t      kernel_c  = wei_shape[idx_c];
         const size_t      kernel_w  = wei_shape[idx_w];
@@ -251,9 +269,9 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const IT
         desc.export_input_to_cl_image   = false;
         desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
 
-        if(depth_multiplier == 1)
+        if (depth_multiplier == 1)
         {
-            if(desc.export_weights_to_cl_image == false)
+            if (desc.export_weights_to_cl_image == false)
             {
                 desc.n0 = 8;
             }
@@ -264,11 +282,11 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const IT
         }
         else
         {
-            if((depth_multiplier % 4) == 0)
+            if ((depth_multiplier % 4) == 0)
             {
                 desc.n0 = 4;
             }
-            else if((depth_multiplier % 2) == 0)
+            else if ((depth_multiplier % 2) == 0)
             {
                 desc.n0 = 2;
             }
@@ -279,14 +297,15 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const IT
         }
 
         // Note: If we reduce n0, export to cl_image must be false
-        ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
+        ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) &&
+                             (desc.export_weights_to_cl_image == true));
 
         desc.n0 = adjust_vec_size(desc.n0, kernel_c);
 
         // Set m0 only if stride_x == 1 and dilation_x == 1
-        if(conv_info.stride().first == 1 && dilation.x() == 1)
+        if (conv_info.stride().first == 1 && dilation.x() == 1)
         {
-            if((kernel_w >= 9) || (kernel_w == 1))
+            if ((kernel_w >= 9) || (kernel_w == 1))
             {
                 desc.m0 = 1;
             }
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h
index 4d51fa668c..fabce77b54 100644
--- a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h
@@ -41,18 +41,33 @@ public:
     ClDWCNativeDefaultConfigValhall(GPUTarget gpu);
 
     // Inherited overridden method
-    DWCComputeKernelInfo configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                   unsigned int depth_multiplier) override;
+    DWCComputeKernelInfo configure(const ITensorInfo   *src,
+                                   const ITensorInfo   *wei,
+                                   const PadStrideInfo &conv_info,
+                                   const Size2D        &dilation,
+                                   unsigned int         depth_multiplier) override;
 
 private:
-    DWCComputeKernelInfo configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                           unsigned int depth_multiplier);
-    DWCComputeKernelInfo configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                           unsigned int depth_multiplier);
-    DWCComputeKernelInfo configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                          unsigned int depth_multiplier);
-    DWCComputeKernelInfo configure_G77_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                           unsigned int depth_multiplier);
+    DWCComputeKernelInfo configure_G78_f32(const ITensorInfo   *src,
+                                           const ITensorInfo   *wei,
+                                           const PadStrideInfo &conv_info,
+                                           const Size2D        &dilation,
+                                           unsigned int         depth_multiplier);
+    DWCComputeKernelInfo configure_G78_f16(const ITensorInfo   *src,
+                                           const ITensorInfo   *wei,
+                                           const PadStrideInfo &conv_info,
+                                           const Size2D        &dilation,
+                                           unsigned int         depth_multiplier);
+    DWCComputeKernelInfo configure_G78_u8(const ITensorInfo   *src,
+                                          const ITensorInfo   *wei,
+                                          const PadStrideInfo &conv_info,
+                                          const Size2D        &dilation,
+                                          unsigned int         depth_multiplier);
+    DWCComputeKernelInfo configure_G77_f16(const ITensorInfo   *src,
+                                           const ITensorInfo   *wei,
+                                           const PadStrideInfo &conv_info,
+                                           const Size2D        &dilation,
+                                           unsigned int         depth_multiplier);
 };
 } // namespace cl_dwc
 } // namespace arm_compute
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp b/src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp
index 5593c6de61..c8b006c546 100644
--- a/src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp
@@ -32,7 +32,7 @@ namespace cl_dwc
 bool use_cl_image_for_weights(const ITensorInfo *weights, unsigned int depth_multiplier)
 {
     // Check whether we can use the cl image with the weights.
-    if(!export_to_cl_image(weights))
+    if (!export_to_cl_image(weights))
     {
         return false;
     }
@@ -45,12 +45,12 @@ bool use_cl_image_for_weights(const ITensorInfo *weights, unsigned int depth_mul
     // If we can use the cl image storage with the weights, we prefer to use the cl buffer storage in the following cases for performance reasons:
     // 1- When the kernel size is 1x1
     // 2- When the depth multiplier is greater than 1 and not multiple of 4.
-    if((kernel_w == 1) && (kernel_h == 1))
+    if ((kernel_w == 1) && (kernel_h == 1))
     {
         return false;
     }
 
-    if((depth_multiplier > 1) && (depth_multiplier % 4) != 0)
+    if ((depth_multiplier > 1) && (depth_multiplier % 4) != 0)
     {
         return false;
     }
@@ -58,4 +58,4 @@ bool use_cl_image_for_weights(const ITensorInfo *weights, unsigned int depth_mul
     return true;
 }
 } // namespace cl_dwc
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h b/src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h
index c08053dcb3..49ce6ff479 100644
--- a/src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h
@@ -46,7 +46,7 @@ public:
      */
     static std::unique_ptr<IClDWCNativeKernelConfig> create(GPUTarget gpu)
     {
-        switch(get_arch_from_target(gpu))
+        switch (get_arch_from_target(gpu))
         {
             case GPUTarget::MIDGARD:
                 // The heuristic for Midgard is the same as the one used for Arm Mali-G71
diff --git a/src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h b/src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h
index b5df132a12..614a6622df 100644
--- a/src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h
+++ b/src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h
@@ -27,6 +27,7 @@
 #include "arm_compute/core/GPUTarget.h"
 #include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Types.h"
+
 #include "src/core/common/Macros.h"
 
 namespace arm_compute
@@ -52,8 +53,7 @@ public:
      * @param[in] func_int8 Function to call for depthwise convolution Int8 (QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL)
      *
      */
-    ClDWCNativeConfigArray(T func_f32, T func_f16, T func_int8)
-        : _configs{ func_f32, func_f16, func_int8 }
+    ClDWCNativeConfigArray(T func_f32, T func_f16, T func_int8) : _configs{func_f32, func_f16, func_int8}
     {
     }
 
@@ -65,7 +65,7 @@ public:
      */
     T get_function(DataType data_type)
     {
-        switch(data_type)
+        switch (data_type)
         {
             case DataType::F32:
                 return _configs.at(DT_F32);
@@ -92,8 +92,7 @@ public:
      *
      * @param[in] arch GPU target
      */
-    IClDWCNativeKernelConfig(GPUTarget arch)
-        : _target(arch)
+    IClDWCNativeKernelConfig(GPUTarget arch) : _target(arch)
     {
     }
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClDWCNativeKernelConfig);
@@ -107,8 +106,11 @@ public:
      * @param[in] dilation         Kernel dilation
      * @param[in] depth_multiplier Output feature maps multiplier
      */
-    virtual DWCComputeKernelInfo configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
-                                           unsigned int depth_multiplier) = 0;
+    virtual DWCComputeKernelInfo configure(const ITensorInfo   *src,
+                                           const ITensorInfo   *wei,
+                                           const PadStrideInfo &conv_info,
+                                           const Size2D        &dilation,
+                                           unsigned int         depth_multiplier) = 0;
 
 protected:
     GPUTarget _target;