20 files changed, 813 insertions, 756 deletions
diff --git a/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp b/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp
index 9350bf74bb..b5ebac3b49 100644
--- a/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp
+++ b/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp
@@ -39,14 +39,24 @@ namespace kernels
 {
 namespace gemm
 {
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
-                                                                       bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m,
+                                                                       unsigned int n,
+                                                                       unsigned int m0,
+                                                                       unsigned int n0,
+                                                                       unsigned int k0,
+                                                                       unsigned int v0,
+                                                                       unsigned int h0,
+                                                                       bool         lhs_interleave,
+                                                                       bool         rhs_interleave,
+                                                                       bool         lhs_transpose,
+                                                                       bool         rhs_transpose,
+                                                                       bool         export_to_cl_image)
 {
     ARM_COMPUTE_ERROR_ON(m0 == 0 || n0 == 0);
     ARM_COMPUTE_ERROR_ON(v0 == 0);
     v0 = std::max(std::min(static_cast<int>(m / m0), static_cast<int>(v0)), static_cast<int>(1));
 
-    if(h0 == 0)
+    if (h0 == 0)
     {
         // When h0 is 0, we should take the maximum H0 possible
         h0 = std::max(n / n0, 1U);
@@ -62,17 +72,22 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned
     return std::make_pair(lhs_info, rhs_info);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> select_lhs_rhs_info(std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_img,
-                                                                    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_buf,
-                                                                    unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+select_lhs_rhs_info(std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_img,
+                    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_buf,
+                    unsigned int                                    n,
+                    unsigned int                                    k,
+                    unsigned int                                    b,
+                    DataType                                        data_type)
 {
-    ARM_COMPUTE_ERROR_ON_MSG(info_buf.second.export_to_cl_image == true, "The fallback GeMM configuration cannot have export_to_cl_image = true");
+    ARM_COMPUTE_ERROR_ON_MSG(info_buf.second.export_to_cl_image == true,
+                             "The fallback GeMM configuration cannot have export_to_cl_image = true");
 
     const TensorInfo  tensor_rhs_info(TensorShape(n, k, b), 1, data_type);
     const TensorShape shape = misc::shape_calculator::compute_rhs_reshaped_shape(tensor_rhs_info, info_img.second);
     const TensorInfo  tensor_reshaped_info(shape, 1, data_type);
 
-    if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, info_img.second)))
+    if (bool(validate_image2d_support_on_rhs(tensor_reshaped_info, info_img.second)))
     {
         return info_img;
     }
@@ -90,42 +105,56 @@ void update_padding_for_cl_image(ITensorInfo *tensor)
     const unsigned int pixel_alignment      = get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device());
 
     ARM_COMPUTE_ERROR_ON_MSG(pixel_alignment == 0, "Cannot retrieve cl_image pitch alignment");
-    if(pixel_alignment == 0)
+    if (pixel_alignment == 0)
     {
         return;
     }
 
     const unsigned int row_pitch_alignment = pixel_alignment * num_floats_per_pixel;
-    const unsigned int round_up_width      = ((stride_y_in_elements + row_pitch_alignment - 1) / row_pitch_alignment) * row_pitch_alignment;
-    const unsigned int padding             = round_up_width - stride_y_in_elements;
+    const unsigned int round_up_width =
+        ((stride_y_in_elements + row_pitch_alignment - 1) / row_pitch_alignment) * row_pitch_alignment;
+    const unsigned int padding = round_up_width - stride_y_in_elements;
 
     tensor->extend_padding(PaddingSize(0, tensor->padding().right + padding, 0, 0));
 }
 
 Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info, const GEMMRHSMatrixInfo &rhs_info)
 {
-    if(rhs_info.export_to_cl_image)
+    if (rhs_info.export_to_cl_image)
     {
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.n0 == 2) || (rhs_info.n0 == 3)) && rhs_info.transpose == false, "Export to cl_image only supported with n0 = 4, 8 or 16");
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.k0 == 2) || (rhs_info.k0 == 3)) && rhs_info.transpose == true, "Export to cl_image only supported with k0 = 4, 8 or 16");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.n0 == 2) || (rhs_info.n0 == 3)) && rhs_info.transpose == false,
+                                        "Export to cl_image only supported with n0 = 4, 8 or 16");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.k0 == 2) || (rhs_info.k0 == 3)) && rhs_info.transpose == true,
+                                        "Export to cl_image only supported with k0 = 4, 8 or 16");
         ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(&tensor_reshaped_info, DataType::F32, DataType::F16);
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()), "The extension cl_khr_image2d_from_buffer is not supported on the target platform");
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0, "Impossible to retrieve the cl_image pitch alignment");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+            !image2d_from_buffer_supported(CLKernelLibrary::get().get_device()),
+            "The extension cl_khr_image2d_from_buffer is not supported on the target platform");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0,
+                                        "Impossible to retrieve the cl_image pitch alignment");
 
         // Check the width and height of the output tensor.
         // Since we cannot create a 3d image from a buffer, the third dimension is collapsed on the second dimension
         const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
         const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
 
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(tensor_reshaped_info.tensor_shape()[0] > max_image_w * 4, "Not supported width for cl_image");
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(tensor_reshaped_info.tensor_shape()[1] * tensor_reshaped_info.tensor_shape()[2] > max_image_h, "Not supported height for cl_image");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(tensor_reshaped_info.tensor_shape()[0] > max_image_w * 4,
+                                        "Not supported width for cl_image");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+            tensor_reshaped_info.tensor_shape()[1] * tensor_reshaped_info.tensor_shape()[2] > max_image_h,
+            "Not supported height for cl_image");
     }
 
     return Status{};
 }
 
-bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const unsigned int k, const unsigned int b,
-                              const DataType data_type, unsigned int &best_m0, unsigned int &best_n0)
+bool is_mmul_kernel_preferred(const unsigned int m,
+                              const unsigned int n,
+                              const unsigned int k,
+                              const unsigned int b,
+                              const DataType     data_type,
+                              unsigned int      &best_m0,
+                              unsigned int      &best_n0)
 {
     ARM_COMPUTE_UNUSED(n, k, b, data_type);
 
@@ -141,7 +170,8 @@ bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const
     return ((k % mmul_k0) == 0) && (gws_y > 4);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> find_lhs_rhs_info(const GeMMConfigsMatrix &configs, unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+find_lhs_rhs_info(const GeMMConfigsMatrix &configs, unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     size_t min_acc = std::numeric_limits<size_t>::max();
     size_t min_idx = 0;
@@ -150,12 +180,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> find_lhs_rhs_info(const GeMMConf
     const size_t num_rows = configs.size();
     const size_t num_cols = configs[0].size();
 
-    ARM_COMPUTE_ERROR_ON_MSG(num_cols != 14U, "The entry should have 14 integer values representing: M, N, K, B, M0, N0. K0, V0, H0, INT_LHS, INT_RHS, TRA_LHS, TRA_RHS, IMG_RHS");
+    ARM_COMPUTE_ERROR_ON_MSG(num_cols != 14U, "The entry should have 14 integer values representing: M, N, K, B, M0, "
+                                              "N0. K0, V0, H0, INT_LHS, INT_RHS, TRA_LHS, TRA_RHS, IMG_RHS");
     ARM_COMPUTE_UNUSED(num_cols);
 
     // Find nearest GeMM workload
     // Note: the workload does not depend on the K dimension
-    for(size_t y = 0; y < num_rows; ++y)
+    for (size_t y = 0; y < num_rows; ++y)
     {
         size_t mc0 = static_cast<size_t>(configs[y][0]);
         size_t nc0 = static_cast<size_t>(configs[y][1]);
@@ -168,7 +199,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> find_lhs_rhs_info(const GeMMConf
         acc += (k - kc0) * (k - kc0);
         acc += (b - bc0) * (b - bc0);
         acc = std::sqrt(acc);
-        if(acc < min_acc)
+        if (acc < min_acc)
         {
             min_acc = acc;
             min_idx = y;
diff --git a/src/gpu/cl/kernels/gemm/ClGemmHelpers.h b/src/gpu/cl/kernels/gemm/ClGemmHelpers.h
index 6689b10e69..84776fb207 100644
--- a/src/gpu/cl/kernels/gemm/ClGemmHelpers.h
+++ b/src/gpu/cl/kernels/gemm/ClGemmHelpers.h
@@ -54,8 +54,18 @@ using GeMMConfigsMatrix = std::vector<std::vector<int32_t>>;
  *
  * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
  */
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
-                                                                       bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image = false);
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m,
+                                                                       unsigned int n,
+                                                                       unsigned int m0,
+                                                                       unsigned int n0,
+                                                                       unsigned int k0,
+                                                                       unsigned int v0,
+                                                                       unsigned int h0,
+                                                                       bool         lhs_interleave,
+                                                                       bool         rhs_interleave,
+                                                                       bool         lhs_transpose,
+                                                                       bool         rhs_transpose,
+                                                                       bool         export_to_cl_image = false);
 
 /** Select @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
  *
@@ -72,9 +82,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned
  *
  * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
  */
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> select_lhs_rhs_info(std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_img,
-                                                                    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_buf,
-                                                                    unsigned int n, unsigned int k, unsigned int b, DataType data_type);
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+select_lhs_rhs_info(std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_img,
+                    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_buf,
+                    unsigned int                                    n,
+                    unsigned int                                    k,
+                    unsigned int                                    b,
+                    DataType                                        data_type);
 
 /** Update padding required to export the OpenCL buffer to OpenCL image2d
  *
@@ -103,8 +117,13 @@ Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info,
  *
  * @return true if MMUL kernel is preferred over kernels w/o MMUL, false otherwise
  */
-bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const unsigned int k, const unsigned int b,
-                              const DataType data_type, unsigned int &best_m0, unsigned int &best_n0);
+bool is_mmul_kernel_preferred(const unsigned int m,
+                              const unsigned int n,
+                              const unsigned int k,
+                              const unsigned int b,
+                              const DataType     data_type,
+                              unsigned int      &best_m0,
+                              unsigned int      &best_n0);
 
 /** Find the preferred configurations for the LHS and RHS tensor using the GeMMConfigsMatrix provided by the user
  *
@@ -116,7 +135,8 @@ bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const
  *
  * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
  */
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> find_lhs_rhs_info(const GeMMConfigsMatrix &configs, unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+find_lhs_rhs_info(const GeMMConfigsMatrix &configs, unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 } // namespace gemm
 } // namespace kernels
 } // namespace opencl
diff --git a/src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h b/src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h
index a49836cfda..9d08633963 100644
--- a/src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h
+++ b/src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/GPUTarget.h"
 #include "arm_compute/core/Types.h"
+
 #include "src/core/common/Macros.h"
 
 #include <array>
@@ -56,8 +57,7 @@ public:
      * @param[in] func_int8 Function to call for GEMM Int8 (QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL)
      *
      */
-    CLGEMMConfigArray(T func_f32, T func_f16, T func_int8)
-        : _configs{ func_f32, func_f16, func_int8 }
+    CLGEMMConfigArray(T func_f32, T func_f16, T func_int8) : _configs{func_f32, func_f16, func_int8}
     {
     }
 
@@ -69,7 +69,7 @@ public:
      */
     T get_function(DataType data_type)
     {
-        switch(data_type)
+        switch (data_type)
         {
             case DataType::F32:
                 return _configs.at(DT_F32);
@@ -96,8 +96,7 @@ public:
      *
      * @param[in] arch GPU target
      */
-    IClGemmKernelConfig(GPUTarget arch)
-        : _target(arch)
+    IClGemmKernelConfig(GPUTarget arch) : _target(arch)
     {
     }
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClGemmKernelConfig);
@@ -111,7 +110,8 @@ public:
      * @param[in] b         Batch size
      * @param[in] data_type Data type
      */
-    virtual std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0;
+    virtual std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0;
 
 protected:
     GPUTarget _target;
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp
index d74c7fac9b..2f37eef31f 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/GPUTarget.h"
+
 #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
 
 #include <utility>
@@ -38,31 +39,34 @@ namespace kernels
 {
 namespace gemm
 {
-ClGemmDefaultConfigNativeBifrost::ClGemmDefaultConfigNativeBifrost(GPUTarget gpu)
-    : IClGemmKernelConfig(gpu)
+ClGemmDefaultConfigNativeBifrost::ClGemmDefaultConfigNativeBifrost(GPUTarget gpu) : IClGemmKernelConfig(gpu)
 {
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
 {
-    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigNativeBifrost::*)(unsigned int m, unsigned int n, unsigned int k,
-                                             unsigned int b);
+    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+        ClGemmDefaultConfigNativeBifrost::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(&ClGemmDefaultConfigNativeBifrost::configure_G71_f32,
-                                                                    &ClGemmDefaultConfigNativeBifrost::configure_G71_f32, // We use the F32 heuristic
-                                                                    &ClGemmDefaultConfigNativeBifrost::configure_G71_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(
+        &ClGemmDefaultConfigNativeBifrost::configure_G71_f32,
+        &ClGemmDefaultConfigNativeBifrost::configure_G71_f32, // We use the F32 heuristic
+        &ClGemmDefaultConfigNativeBifrost::configure_G71_u8);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(&ClGemmDefaultConfigNativeBifrost::configure_G76_f32,
-                                                                    &ClGemmDefaultConfigNativeBifrost::configure_G76_f32, // We use the F32 heuristic
-                                                                    &ClGemmDefaultConfigNativeBifrost::configure_G76_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(
+        &ClGemmDefaultConfigNativeBifrost::configure_G76_f32,
+        &ClGemmDefaultConfigNativeBifrost::configure_G76_f32, // We use the F32 heuristic
+        &ClGemmDefaultConfigNativeBifrost::configure_G76_u8);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(&ClGemmDefaultConfigNativeBifrost::configure_default_f32,
-                                                                    &ClGemmDefaultConfigNativeBifrost::configure_default_f32, // We use the F32 heuristic
-                                                                    &ClGemmDefaultConfigNativeBifrost::configure_default_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(
+        &ClGemmDefaultConfigNativeBifrost::configure_default_f32,
+        &ClGemmDefaultConfigNativeBifrost::configure_default_f32, // We use the F32 heuristic
+        &ClGemmDefaultConfigNativeBifrost::configure_default_u8);
 
     ConfigurationFunctionExecutorPtr func = nullptr;
 
-    switch(_target)
+    switch (_target)
     {
         case GPUTarget::G76:
             func = configs_G76.get_function(data_type);
@@ -79,18 +83,19 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
     return (this->*func)(m, n, k, b);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
-        if(n < 2048)
+        if (n < 2048)
         {
             return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 1, false, false, false, false);
         }
-        else if(n >= 2048 && n < 8192)
+        else if (n >= 2048 && n < 8192)
         {
             return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 1, false, false, false, false);
         }
@@ -105,20 +110,21 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(dot8_supported(CLKernelLibrary::get().get_device()))
+    if (dot8_supported(CLKernelLibrary::get().get_device()))
     {
-        if(m == 1)
+        if (m == 1)
         {
-            if(n < 2048)
+            if (n < 2048)
             {
                 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 1, false, false, false, false);
             }
-            else if(n >= 2048 && n < 16384)
+            else if (n >= 2048 && n < 16384)
             {
                 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 1, false, false, false, false);
             }
@@ -129,7 +135,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
         }
         else
         {
-            if(m < 64)
+            if (m < 64)
             {
                 return configure_lhs_rhs_info(m, n, 2, 2, 16, 1, 1, false, false, false, false);
             }
@@ -141,9 +147,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
     }
     else
     {
-        if(m == 1)
+        if (m == 1)
         {
-            if(n < 8192)
+            if (n < 8192)
             {
                 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 1, false, false, false, false);
             }
@@ -159,24 +165,25 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
-        if(n > 4196)
+        if (n > 4196)
         {
             return configure_lhs_rhs_info(m, n, 1, 4, 2, 1, 1, false, false, false, false);
         }
         else
         {
-            if(k < 2048)
+            if (k < 2048)
             {
                 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 1, false, false, false, false);
             }
-            else if(k >= 2048 && k < 16384)
+            else if (k >= 2048 && k < 16384)
             {
                 return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 1, false, false, false, false);
             }
@@ -192,18 +199,19 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
-        if(n < 2048)
+        if (n < 2048)
         {
             return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 1, false, false, false, false);
         }
-        else if(n >= 2048 && n < 16384)
+        else if (n >= 2048 && n < 16384)
         {
             return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 1, false, false, false, false);
         }
@@ -214,7 +222,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
     }
     else
     {
-        if(m < 64)
+        if (m < 64)
         {
             return configure_lhs_rhs_info(m, n, 2, 2, 16, 1, 1, false, false, false, false);
         }
@@ -225,7 +233,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
@@ -233,7 +242,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
     return configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 1, false, false, false, false);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
@@ -243,4 +253,4 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
 } // namespace gemm
 } // namespace kernels
 } // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.h b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.h
index 9af5dc4135..f822daae53 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.h
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.h
@@ -45,15 +45,22 @@ public:
     ClGemmDefaultConfigNativeBifrost(GPUTarget gpu);
 
     // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
 
 private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 };
 } // namespace gemm
 } // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp
index b9f36c7210..f87fb1b659 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/GPUTarget.h"
+
 #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
 
 #include <utility>
@@ -38,18 +39,17 @@ namespace kernels
 {
 namespace gemm
 {
-ClGemmDefaultConfigNativeMidgard::ClGemmDefaultConfigNativeMidgard(GPUTarget gpu)
-    : IClGemmKernelConfig(gpu)
+ClGemmDefaultConfigNativeMidgard::ClGemmDefaultConfigNativeMidgard(GPUTarget gpu) : IClGemmKernelConfig(gpu)
 {
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeMidgard::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeMidgard::configure(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
 {
-    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigNativeMidgard::*)(unsigned int m, unsigned int n, unsigned int k,
-                                             unsigned int b);
+    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+        ClGemmDefaultConfigNativeMidgard::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_default(nullptr,
-                                                                        nullptr,
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_default(nullptr, nullptr,
                                                                         &ClGemmDefaultConfigNativeMidgard::default_q8);
 
     auto func = configs_default.get_function(data_type);
@@ -57,7 +57,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeMidgard
     return (this->*func)(m, n, k, b);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeMidgard::default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeMidgard::default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
@@ -70,4 +71,4 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeMidgard
 } // namespace gemm
 } // namespace kernels
 } // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.h b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.h
index c055753c48..fa76c5dba7 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.h
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.h
@@ -45,10 +45,12 @@ public:
     ClGemmDefaultConfigNativeMidgard(GPUTarget gpu);
 
     // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
 
 private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 };
 } // namespace gemm
 } // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp
index 95a4d2bd69..97a1298b0a 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/GPUTarget.h"
+
 #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
 
 #include <utility>
@@ -38,37 +39,38 @@ namespace kernels
 {
 namespace gemm
 {
-ClGemmDefaultConfigNativeValhall::ClGemmDefaultConfigNativeValhall(GPUTarget gpu)
-    : IClGemmKernelConfig(gpu)
+ClGemmDefaultConfigNativeValhall::ClGemmDefaultConfigNativeValhall(GPUTarget gpu) : IClGemmKernelConfig(gpu)
 {
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall::configure(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
 {
-    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigNativeValhall::*)(unsigned int m, unsigned int n, unsigned int k,
-                                             unsigned int b);
+    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+        ClGemmDefaultConfigNativeValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_default(&ClGemmDefaultConfigNativeValhall::configure_G77_f32,
-                                                                        &ClGemmDefaultConfigNativeValhall::configure_G77_f16,
-                                                                        &ClGemmDefaultConfigNativeValhall::configure_G77_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_default(
+        &ClGemmDefaultConfigNativeValhall::configure_G77_f32, &ClGemmDefaultConfigNativeValhall::configure_G77_f16,
+        &ClGemmDefaultConfigNativeValhall::configure_G77_u8);
 
     auto func = configs_default.get_function(data_type);
     ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not support for GEMM");
     return (this->*func)(m, n, k, b);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
-        if(n < 2048)
+        if (n < 2048)
         {
             return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 1, false, false, false, false);
         }
-        else if(n >= 2048 && n < 8192)
+        else if (n >= 2048 && n < 8192)
         {
             return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 1, false, false, false, false);
         }
@@ -83,18 +85,19 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
-        if(n < 2048)
+        if (n < 2048)
         {
             return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 1, false, false, false, false);
         }
-        else if(n >= 2048 && n < 8192)
+        else if (n >= 2048 && n < 8192)
         {
             return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 1, false, false, false, false);
         }
@@ -109,20 +112,21 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(dot8_supported(CLKernelLibrary::get().get_device()))
+    if (dot8_supported(CLKernelLibrary::get().get_device()))
     {
-        if(m == 1)
+        if (m == 1)
         {
-            if(n < 2048)
+            if (n < 2048)
             {
                 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 1, false, false, false, false);
             }
-            else if(n >= 2048 && n < 16384)
+            else if (n >= 2048 && n < 16384)
             {
                 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 1, false, false, false, false);
             }
@@ -133,7 +137,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall
         }
         else
         {
-            if(m < 64)
+            if (m < 64)
             {
                 return configure_lhs_rhs_info(m, n, 2, 2, 16, 1, 1, false, false, false, false);
             }
@@ -145,9 +149,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall
     }
     else
     {
-        if(m == 1)
+        if (m == 1)
         {
-            if(n < 8192)
+            if (n < 8192)
             {
                 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 1, false, false, false, false);
             }
@@ -165,4 +169,4 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall
 } // namespace gemm
 } // namespace kernels
 } // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.h b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.h
index f0f812fd46..c91b095279 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.h
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.h
@@ -45,12 +45,16 @@ public:
     ClGemmDefaultConfigNativeValhall(GPUTarget gpu);
 
     // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
 
 private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 };
 } // namespace gemm
 } // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h b/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h
index cf8412830b..955bb3c01a 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h
@@ -51,7 +51,7 @@ public:
      */
     static std::unique_ptr<IClGemmKernelConfig> create(GPUTarget gpu)
     {
-        switch(get_arch_from_target(gpu))
+        switch (get_arch_from_target(gpu))
         {
             case GPUTarget::MIDGARD:
                 return std::make_unique<ClGemmDefaultConfigNativeMidgard>(gpu);
diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp
index 657018eb53..c956c347ef 100644
--- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp
+++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp
@@ -29,6 +29,7 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
 #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
 
 #include <utility>
@@ -43,30 +44,31 @@ namespace gemm
 {
 using namespace arm_compute::misc::shape_calculator;
 
-ClGemmDefaultConfigReshapedBifrost::ClGemmDefaultConfigReshapedBifrost(GPUTarget gpu)
-    : IClGemmKernelConfig(gpu)
+ClGemmDefaultConfigReshapedBifrost::ClGemmDefaultConfigReshapedBifrost(GPUTarget gpu) : IClGemmKernelConfig(gpu)
 {
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
 {
-    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigReshapedBifrost::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+        ClGemmDefaultConfigReshapedBifrost::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(&ClGemmDefaultConfigReshapedBifrost::configure_G7x_f32,
-                                                                    &ClGemmDefaultConfigReshapedBifrost::configure_G7x_f16,
-                                                                    &ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(
+        &ClGemmDefaultConfigReshapedBifrost::configure_G7x_f32, &ClGemmDefaultConfigReshapedBifrost::configure_G7x_f16,
+        &ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G52(&ClGemmDefaultConfigReshapedBifrost::configure_G52_f32,
-                                                                    &ClGemmDefaultConfigReshapedBifrost::configure_G52_f16,
-                                                                    &ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G52(
+        &ClGemmDefaultConfigReshapedBifrost::configure_G52_f32, &ClGemmDefaultConfigReshapedBifrost::configure_G52_f16,
+        &ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(&ClGemmDefaultConfigReshapedBifrost::configure_G76_f32,
-                                                                    &ClGemmDefaultConfigReshapedBifrost::configure_G76_f16,
-                                                                    &ClGemmDefaultConfigReshapedBifrost::configure_G76_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(
+        &ClGemmDefaultConfigReshapedBifrost::configure_G76_f32, &ClGemmDefaultConfigReshapedBifrost::configure_G76_f16,
+        &ClGemmDefaultConfigReshapedBifrost::configure_G76_u8);
 
     ConfigurationFunctionExecutorPtr func = nullptr;
 
-    switch(_target)
+    switch (_target)
     {
         case GPUTarget::G76:
             func = configs_G76.get_function(data_type);
@@ -83,12 +85,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
     return (this->*func)(m, n, k, b);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(n <= 4)
+    if (n <= 4)
     {
         return configure_lhs_rhs_info(m, n, 4, 2, 8, 16, 16, true, false, false, true);
     }
@@ -98,12 +101,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(n <= 4)
+    if (n <= 4)
     {
         return configure_lhs_rhs_info(m, n, 4, 2, 8, 8, 2, true, true, true, false);
     }
@@ -113,14 +117,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(dot8_supported(CLKernelLibrary::get().get_device()))
+    if (dot8_supported(CLKernelLibrary::get().get_device()))
     {
-        if(n <= 4)
+        if (n <= 4)
         {
             return configure_lhs_rhs_info(m, n, 4, 2, 16, 2, 2, true, false, false, true);
         }
@@ -131,7 +136,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
     }
     else
     {
-        if(n <= 4)
+        if (n <= 4)
         {
             return configure_lhs_rhs_info(m, n, 4, 2, 8, 2, 2, true, false, false, true);
         }
@@ -142,7 +147,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     const float r_mn     = static_cast<float>(m) / static_cast<float>(n);
     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
@@ -154,100 +160,108 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
     GEMMLHSMatrixInfo lhs_info_img;
     GEMMRHSMatrixInfo rhs_info_img;
 
-    if(workload <= 274.4000f)
+    if (workload <= 274.4000f)
     {
-        if(r_nk <= 0.7461f)
+        if (r_nk <= 0.7461f)
         {
-            if(r_mn <= 21.1667f)
+            if (r_mn <= 21.1667f)
             {
                 return configure_lhs_rhs_info(m, n, 4, 2, 4, 4, 4, false, true, true, false, false);
             }
             else
             {
-                std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
-                std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+                std::tie(lhs_info_img, rhs_info_img) =
+                    configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+                std::tie(lhs_info_buf, rhs_info_buf) =
+                    configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
 
                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                           std::make_pair(lhs_info_buf, rhs_info_buf),
-                                           n, k, b, DataType::F32);
+                                           std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
             }
         }
         else
         {
-            std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
-            std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+            std::tie(lhs_info_img, rhs_info_img) =
+                configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+            std::tie(lhs_info_buf, rhs_info_buf) =
+                configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
 
             return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                       std::make_pair(lhs_info_buf, rhs_info_buf),
-                                       n, k, b, DataType::F32);
+                                       std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
         }
     }
     else
     {
-        if(r_mk <= 17.3926f)
+        if (r_mk <= 17.3926f)
         {
-            if(workload <= 542.4000f)
+            if (workload <= 542.4000f)
             {
-                std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
-                std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+                std::tie(lhs_info_img, rhs_info_img) =
+                    configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+                std::tie(lhs_info_buf, rhs_info_buf) =
+                    configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
 
                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                           std::make_pair(lhs_info_buf, rhs_info_buf),
-                                           n, k, b, DataType::F32);
+                                           std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
             }
             else
             {
-                std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
-                std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);
+                std::tie(lhs_info_img, rhs_info_img) =
+                    configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
+                std::tie(lhs_info_buf, rhs_info_buf) =
+                    configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);
 
                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                           std::make_pair(lhs_info_buf, rhs_info_buf),
-                                           n, k, b, DataType::F32);
+                                           std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
             }
         }
         else
         {
-            if(r_nk <= 0.5463f)
+            if (r_nk <= 0.5463f)
             {
-                if(workload <= 11767.6001f)
+                if (workload <= 11767.6001f)
                 {
-                    std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
-                    std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+                    std::tie(lhs_info_img, rhs_info_img) =
+                        configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+                    std::tie(lhs_info_buf, rhs_info_buf) =
+                        configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
 
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F32);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
                 }
                 else
                 {
-                    std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
-                    std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);
+                    std::tie(lhs_info_img, rhs_info_img) =
+                        configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
+                    std::tie(lhs_info_buf, rhs_info_buf) =
+                        configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);
 
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F32);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
                 }
             }
             else
             {
-                std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
-                std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+                std::tie(lhs_info_img, rhs_info_img) =
+                    configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+                std::tie(lhs_info_buf, rhs_info_buf) =
+                    configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
 
                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                           std::make_pair(lhs_info_buf, rhs_info_buf),
-                                           n, k, b, DataType::F32);
+                                           std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
             }
         }
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
 
     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
 
-    if(workload <= 323.4000f)
+    if (workload <= 323.4000f)
     {
         return configure_lhs_rhs_info(m, n, 2, 2, 8, 4, 8, false, false, false, true, false);
     }
@@ -257,7 +271,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
@@ -268,7 +283,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
     GEMMRHSMatrixInfo rhs_info_img;
 
     // Get lhs_info/rhs_info in case of OpenCL buffer
-    if(n <= 4)
+    if (n <= 4)
     {
         std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 2, 8, 16, 16, true, false, false, true);
     }
@@ -279,15 +294,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
 
     // Get lhs_info/rhs_info in case of OpenCL image
     // Condition on the GPU workload
-    if((m / 4) * (n / 4) >= 2560)
+    if ((m / 4) * (n / 4) >= 2560)
     {
         // Big workload
-        std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 8, true, true, true, false, true);
+        std::tie(lhs_info_img, rhs_info_img) =
+            configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 8, true, true, true, false, true);
     }
     else
     {
         // Small workload
-        std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 1, true, true, true, false, true);
+        std::tie(lhs_info_img, rhs_info_img) =
+            configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 1, true, true, true, false, true);
     }
 
     const TensorInfo  tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
@@ -297,7 +314,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
     // In case of vector by matrix with few work-items, we use the OpenCL buffer rather than the OpenCL image2d
     const bool use_cl_image2d = (n <= 4) ? false : true;
 
-    if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
+    if (bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
     {
         return std::make_pair(lhs_info_img, rhs_info_img);
     }
@@ -307,16 +324,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
     const float r_mk     = static_cast<float>(m) / static_cast<float>(k);
 
-    if(workload <= 1595.2000f)
+    if (workload <= 1595.2000f)
     {
-        if(r_mk <= 2.1044f)
+        if (r_mk <= 2.1044f)
         {
-            if(workload <= 870.4000f)
+            if (workload <= 870.4000f)
             {
                 return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 2, true, false, true, false, false);
             }
@@ -336,12 +354,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(n <= 4)
+    if (n <= 4)
     {
         return configure_lhs_rhs_info(m, n, 4, 2, 16, 4, 1, false, false, false, true);
     }
diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.h b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.h
index d86d1ba0a7..9227ec2551 100644
--- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.h
+++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.h
@@ -45,17 +45,26 @@ public:
     ClGemmDefaultConfigReshapedBifrost(GPUTarget gpu);
 
     // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
 
 private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 };
 } // namespace gemm
 } // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp
index 58d0873b86..70b324eb5a 100644
--- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp
+++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/GPUTarget.h"
+
 #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
 
 #include <utility>
@@ -38,26 +39,27 @@ namespace kernels
 {
 namespace gemm
 {
-ClGemmDefaultConfigReshapedValhall::ClGemmDefaultConfigReshapedValhall(GPUTarget gpu)
-    : IClGemmKernelConfig(gpu)
+ClGemmDefaultConfigReshapedValhall::ClGemmDefaultConfigReshapedValhall(GPUTarget gpu) : IClGemmKernelConfig(gpu)
 {
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
 {
-    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigReshapedValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+        ClGemmDefaultConfigReshapedValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(&ClGemmDefaultConfigReshapedValhall::configure_G77_f32,
-                                                                    &ClGemmDefaultConfigReshapedValhall::configure_G77_f16,
-                                                                    &ClGemmDefaultConfigReshapedValhall::configure_G77_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(
+        &ClGemmDefaultConfigReshapedValhall::configure_G77_f32, &ClGemmDefaultConfigReshapedValhall::configure_G77_f16,
+        &ClGemmDefaultConfigReshapedValhall::configure_G77_u8);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClGemmDefaultConfigReshapedValhall::configure_G78_f32,
-                                                                    &ClGemmDefaultConfigReshapedValhall::configure_G78_f16,
-                                                                    &ClGemmDefaultConfigReshapedValhall::configure_G77_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(
+        &ClGemmDefaultConfigReshapedValhall::configure_G78_f32, &ClGemmDefaultConfigReshapedValhall::configure_G78_f16,
+        &ClGemmDefaultConfigReshapedValhall::configure_G77_u8);
 
     ConfigurationFunctionExecutorPtr func = nullptr;
 
-    switch(_target)
+    switch (_target)
     {
         case GPUTarget::G78:
             func = configs_G78.get_function(data_type);
@@ -72,12 +74,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
     return (this->*func)(m, n, k, b);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(n <= 4)
+    if (n <= 4)
     {
         return configure_lhs_rhs_info(m, n, 4, 2, 8, 16, 16, 1, 0, 0, 1);
     }
@@ -87,7 +90,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
@@ -104,17 +108,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
 
     std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 1, 0, 0);
 
-    if(r_mk <= 0.11824845522642136)
+    if (r_mk <= 0.11824845522642136)
     {
-        if(workload <= 880.0)
+        if (workload <= 880.0)
         {
             return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 4, 0, 0, 1, 0, 0);
         }
         else
         {
-            if(r_nk <= 0.42521367967128754)
+            if (r_nk <= 0.42521367967128754)
             {
-                if(workload <= 1726.4000244140625)
+                if (workload <= 1726.4000244140625)
                 {
                     return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 0);
                 }
@@ -123,13 +127,12 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
                     std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 0, 1, 1, 0, 1);
 
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F16);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
                 }
             }
             else
             {
-                if(workload <= 1241.6000366210938)
+                if (workload <= 1241.6000366210938)
                 {
                     return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 4, 0, 0, 1, 0, 0);
                 }
@@ -142,17 +145,16 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
     }
     else
     {
-        if(workload <= 11404.7998046875)
+        if (workload <= 11404.7998046875)
         {
-            if(r_mk <= 1.0126488208770752)
+            if (r_mk <= 1.0126488208770752)
             {
-                if(r_mn <= 2.545312523841858)
+                if (r_mn <= 2.545312523841858)
                 {
                     std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 0, 1, 1, 0, 1);
 
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F16);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
                 }
                 else
                 {
@@ -161,43 +163,39 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
             }
             else
             {
-                if(workload <= 2881.199951171875)
+                if (workload <= 2881.199951171875)
                 {
                     std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, 0, 0, 1, 0, 1);
 
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F16);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
                 }
                 else
                 {
                     std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 0, 1, 1, 0, 1);
 
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F16);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
                 }
             }
         }
         else
         {
-            if(r_nk <= 0.5765306055545807)
+            if (r_nk <= 0.5765306055545807)
             {
-                if(r_mn <= 6.010416746139526)
+                if (r_mn <= 6.010416746139526)
                 {
                     std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 0, 1, 1, 0, 1);
 
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F16);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
                 }
                 else
                 {
                     std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 1, 0, 1, 0, 1);
 
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F16);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
                 }
             }
             else
@@ -205,27 +203,27 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
                 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 1, 0, 1, 0, 1);
 
                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                           std::make_pair(lhs_info_buf, rhs_info_buf),
-                                           n, k, b, DataType::F16);
+                                           std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
             }
         }
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedValhall::configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     const float r_mn     = static_cast<float>(m) / static_cast<float>(n);
     const float r_mk     = static_cast<float>(m) / static_cast<float>(k);
     const float r_nk     = static_cast<float>(n) / static_cast<float>(k);
     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
 
-    if(workload <= 1288.0000f)
+    if (workload <= 1288.0000f)
     {
-        if(workload <= 505.6000f)
+        if (workload <= 505.6000f)
         {
-            if(r_mn <= 0.4466f)
+            if (r_mn <= 0.4466f)
             {
-                if(r_nk <= 0.2384f)
+                if (r_nk <= 0.2384f)
                 {
                     return configure_lhs_rhs_info(m, n, 2, 4, 8, 4, 4, 0, 0, 1, 0, 1);
                 }
@@ -241,9 +239,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
         }
         else
         {
-            if(r_mn <= 0.2250f)
+            if (r_mn <= 0.2250f)
             {
-                if(r_mn <= 0.1599f)
+                if (r_mn <= 0.1599f)
                 {
                     return configure_lhs_rhs_info(m, n, 2, 4, 8, 4, 4, 0, 0, 1, 0, 1);
                 }
@@ -254,11 +252,11 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
             }
             else
             {
-                if(r_mk <= 0.7609f)
+                if (r_mk <= 0.7609f)
                 {
-                    if(r_mn <= 2.5453f)
+                    if (r_mn <= 2.5453f)
                     {
-                        if(workload <= 1089.6000f)
+                        if (workload <= 1089.6000f)
                         {
                             return configure_lhs_rhs_info(m, n, 2, 4, 8, 4, 4, 0, 0, 1, 0, 1);
                         }
@@ -281,29 +279,29 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
     }
     else
     {
-        if(workload <= 5434.4001f)
+        if (workload <= 5434.4001f)
         {
-            if(workload <= 1603.2000f)
+            if (workload <= 1603.2000f)
             {
                 return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
             }
             else
             {
-                if(r_nk <= 0.6192f)
+                if (r_nk <= 0.6192f)
                 {
-                    if(r_mn <= 16.1016f)
+                    if (r_mn <= 16.1016f)
                     {
                         return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
                     }
                     else
                     {
-                        if(workload <= 2750.0000f)
+                        if (workload <= 2750.0000f)
                         {
                             return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
                         }
                         else
                         {
-                            if(r_mk <= 6.3151f)
+                            if (r_mk <= 6.3151f)
                             {
                                 return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 0, 1, 1);
                             }
@@ -316,15 +314,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
                 }
                 else
                 {
-                    if(r_mk <= 0.0387f)
+                    if (r_mk <= 0.0387f)
                     {
                         return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 1, 0, 1);
                     }
                     else
                     {
-                        if(r_mk <= 2.5859f)
+                        if (r_mk <= 2.5859f)
                         {
-                            if(r_mk <= 0.2734f)
+                            if (r_mk <= 0.2734f)
                             {
                                 return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 1, 0, 1);
                             }
@@ -343,13 +341,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
         }
         else
         {
-            if(r_mk <= 25.7500f)
+            if (r_mk <= 25.7500f)
             {
-                if(r_mk <= 0.3615f)
+                if (r_mk <= 0.3615f)
                 {
-                    if(r_mn <= 0.0913f)
+                    if (r_mn <= 0.0913f)
                     {
-                        if(r_mk <= 0.0683f)
+                        if (r_mk <= 0.0683f)
                         {
                             return configure_lhs_rhs_info(m, n, 8, 4, 4, 4, 2, 0, 0, 1, 0, 1);
                         }
@@ -365,15 +363,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
                 }
                 else
                 {
-                    if(workload <= 11174.3999f)
+                    if (workload <= 11174.3999f)
                     {
-                        if(r_mk <= 0.8047f)
+                        if (r_mk <= 0.8047f)
                         {
                             return configure_lhs_rhs_info(m, n, 8, 4, 4, 2, 2, 0, 0, 1, 0, 1);
                         }
                         else
                         {
-                            if(workload <= 7185.5999f)
+                            if (workload <= 7185.5999f)
                             {
                                 return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 1, 0, 1);
                             }
@@ -385,9 +383,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
                     }
                     else
                     {
-                        if(workload <= 17917.5000f)
+                        if (workload <= 17917.5000f)
                         {
-                            if(r_mk <= 1.5078f)
+                            if (r_mk <= 1.5078f)
                             {
                                 return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
                             }
@@ -398,7 +396,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
                         }
                         else
                         {
-                            if(workload <= 34449.6016f)
+                            if (workload <= 34449.6016f)
                             {
                                 return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
                             }
@@ -412,11 +410,11 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
             }
             else
             {
-                if(r_mk <= 331.1111f)
+                if (r_mk <= 331.1111f)
                 {
-                    if(workload <= 53397.5996f)
+                    if (workload <= 53397.5996f)
                     {
-                        if(r_mn <= 57.8063f)
+                        if (r_mn <= 57.8063f)
                         {
                             return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
                         }
@@ -427,7 +425,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
                     }
                     else
                     {
-                        if(r_nk <= 0.9211f)
+                        if (r_nk <= 0.9211f)
                         {
                             return configure_lhs_rhs_info(m, n, 8, 4, 4, 4, 2, 0, 0, 1, 0, 1);
                         }
@@ -439,7 +437,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
                 }
                 else
                 {
-                    if(workload <= 38070.4004f)
+                    if (workload <= 38070.4004f)
                     {
                         return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 0, 1, 1);
                     }
@@ -453,27 +451,28 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedValhall::configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     const float r_mn     = static_cast<float>(m) / static_cast<float>(n);
     const float r_nk     = static_cast<float>(n) / static_cast<float>(k);
     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
 
-    if(workload <= 801.6000f)
+    if (workload <= 801.6000f)
     {
         return configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, 0, 0, 1, 0, 1);
     }
     else
     {
-        if(r_mn <= 0.1211f)
+        if (r_mn <= 0.1211f)
         {
-            if(workload <= 3296.0000f)
+            if (workload <= 3296.0000f)
             {
                 return configure_lhs_rhs_info(m, n, 8, 4, 4, 2, 2, 0, 0, 1, 0, 1);
             }
             else
             {
-                if(r_nk <= 1.0625f)
+                if (r_nk <= 1.0625f)
                 {
                     return configure_lhs_rhs_info(m, n, 8, 4, 4, 2, 2, 0, 0, 1, 0, 1);
                 }
@@ -485,15 +484,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
         }
         else
         {
-            if(workload <= 5068.8000f)
+            if (workload <= 5068.8000f)
             {
                 return configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, 0, 0, 1, 0, 1);
             }
             else
             {
-                if(r_nk <= 0.2361f)
+                if (r_nk <= 0.2361f)
                 {
-                    if(workload <= 12630.0000f)
+                    if (workload <= 12630.0000f)
                     {
                         return configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, 0, 0, 1, 0, 1);
                     }
@@ -504,7 +503,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
                 }
                 else
                 {
-                    if(workload <= 178790.3984f)
+                    if (workload <= 178790.3984f)
                     {
                         return configure_lhs_rhs_info(m, n, 8, 4, 4, 2, 2, 0, 0, 1, 0, 1);
                     }
@@ -518,12 +517,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(n <= 4)
+    if (n <= 4)
     {
         return configure_lhs_rhs_info(m, n, 4, 2, 16, 4, 1, 0, 0, 0, 1);
     }
diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.h b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.h
index 466eda00a6..5f62efb59e 100644
--- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.h
+++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.h
@@ -45,14 +45,20 @@ public:
     ClGemmDefaultConfigReshapedValhall(GPUTarget gpu);
 
     // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
 
 private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 };
 } // namespace gemm
 } // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h b/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h
index 1c32f1358b..83928b3f4f 100644
--- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h
+++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h
@@ -50,7 +50,7 @@ public:
      */
     static std::unique_ptr<IClGemmKernelConfig> create(GPUTarget gpu)
     {
-        switch(get_arch_from_target(gpu))
+        switch (get_arch_from_target(gpu))
         {
             case GPUTarget::MIDGARD:
             case GPUTarget::BIFROST:
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp
index 9c23d9c998..c4825bfbeb 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp
@@ -29,7 +29,9 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
 #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -47,33 +49,39 @@ ClGemmDefaultConfigReshapedRhsOnlyBifrost::ClGemmDefaultConfigReshapedRhsOnlyBif
 {
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
 {
-    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigReshapedRhsOnlyBifrost::*)(unsigned int m, unsigned int n, unsigned int k,
-                                             unsigned int b);
-
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G51(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8);
-
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G52(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8);
-
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G31(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G31_u8);
-
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8);
-
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8);
+    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+        ClGemmDefaultConfigReshapedRhsOnlyBifrost::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G51(
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32,
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16,
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8);
+
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G52(
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32,
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16,
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8);
+
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G31(
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32,
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16,
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G31_u8);
+
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32,
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16,
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8);
+
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32,
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16,
+        &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8);
 
     ConfigurationFunctionExecutorPtr func = nullptr;
-    switch(_target)
+    switch (_target)
     {
         case GPUTarget::G76:
             func = configs_G76.get_function(data_type);
@@ -96,14 +104,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     return (this->*func)(m, n, k, b);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
-        if(n <= 2548)
+        if (n <= 2548)
         {
             return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, false, true, false, true, false);
         }
@@ -118,12 +127,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G31_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G31_u8(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
         const unsigned int h0 = std::max(n / 2, 1U);
         return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, 0, 1, 0, 1);
@@ -131,7 +141,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     else
     {
         const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
-        if(m >= 28)
+        if (m >= 28)
         {
             return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, 0, 1, 0, 1);
         }
@@ -142,7 +152,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
@@ -154,9 +165,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
 
     const bool is_workload_big = ((m * n * b) / 16) >= 2048;
 
-    if(m == 1)
+    if (m == 1)
     {
-        if(n >= 8192)
+        if (n >= 8192)
         {
             const unsigned int h0 = std::max(n / 4, 1U);
             return configure_lhs_rhs_info(m, n, 1, 4, 8, 1, h0, false, true, false, true, false);
@@ -164,7 +175,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
         else
         {
             const unsigned int h0 = std::max(n / 2, 1U);
-            if(n <= 204)
+            if (n <= 204)
             {
                 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true, false);
             }
@@ -177,25 +188,29 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     else
     {
         const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
-        if(is_workload_big)
+        if (is_workload_big)
         {
-            std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, true);
+            std::tie(lhs_info_buf, rhs_info_buf) =
+                configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, true);
         }
         else
         {
-            std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true);
+            std::tie(lhs_info_buf, rhs_info_buf) =
+                configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true);
         }
     }
 
     // Get lhs_info/rhs_info in case of OpenCL image
     const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
-    if(is_workload_big)
+    if (is_workload_big)
     {
-        std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true);
+        std::tie(lhs_info_img, rhs_info_img) =
+            configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true);
     }
     else
     {
-        std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true, true);
+        std::tie(lhs_info_img, rhs_info_img) =
+            configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true, true);
     }
 
     const TensorInfo  tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
@@ -205,7 +220,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     // In case of vector by matrix or small workloads, we use the OpenCL buffer rather than the OpenCL image2d
     const bool use_cl_image2d = ((m == 1) || ((((m * n * b) / 16) < 2048) && n < 128)) ? false : true;
 
-    if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
+    if (bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
     {
         return std::make_pair(lhs_info_img, rhs_info_img);
     }
@@ -215,7 +230,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
     const float r_nk     = static_cast<float>(n) / static_cast<float>(k);
@@ -225,46 +241,49 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     GEMMLHSMatrixInfo lhs_info_img;
     GEMMRHSMatrixInfo rhs_info_img;
 
-    if(m == 1)
+    if (m == 1)
     {
-        if(r_nk <= 0.4664f)
+        if (r_nk <= 0.4664f)
         {
             return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 16, false, true, false, true, false);
         }
         else
         {
-            std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, true);
-            std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, false);
+            std::tie(lhs_info_img, rhs_info_img) =
+                configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, true);
+            std::tie(lhs_info_buf, rhs_info_buf) =
+                configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, false);
 
             return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                       std::make_pair(lhs_info_buf, rhs_info_buf),
-                                       n, k, b, DataType::F32);
+                                       std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
         }
     }
     else
     {
-        if(workload <= 274.4000f)
+        if (workload <= 274.4000f)
         {
             return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 16, false, false, false, true, false);
         }
         else
         {
-            std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, true);
-            std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, false);
+            std::tie(lhs_info_img, rhs_info_img) =
+                configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, true);
+            std::tie(lhs_info_buf, rhs_info_buf) =
+                configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, false);
 
             return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                       std::make_pair(lhs_info_buf, rhs_info_buf),
-                                       n, k, b, DataType::F32);
+                                       std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
         }
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
         const unsigned int n0 = n < 1280 ? 2 : 4;
         const unsigned int h0 = std::max(n / n0, 1U);
@@ -276,14 +295,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
-        if(n > 2048)
+        if (n > 2048)
         {
             const unsigned int h0 = std::max(n / 4, 1U);
             return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, false, true, false, true);
@@ -300,7 +320,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     const float r_mn     = static_cast<float>(m) / static_cast<float>(n);
     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
@@ -312,57 +333,59 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     GEMMLHSMatrixInfo lhs_info_img;
     GEMMRHSMatrixInfo rhs_info_img;
 
-    if(m == 1)
+    if (m == 1)
     {
-        std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, false);
+        std::tie(lhs_info_buf, rhs_info_buf) =
+            configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, false);
 
-        if(r_mk <= 0.0026f)
+        if (r_mk <= 0.0026f)
         {
-            if(r_nk <= 0.4664f)
+            if (r_nk <= 0.4664f)
             {
                 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
             }
             else
             {
-                std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
+                std::tie(lhs_info_img, rhs_info_img) =
+                    configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                           std::make_pair(lhs_info_buf, rhs_info_buf),
-                                           n, k, b, DataType::F16);
+                                           std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
             }
         }
         else
         {
-            if(r_mk <= 0.0148f)
+            if (r_mk <= 0.0148f)
             {
                 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
             }
             else
             {
-                std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
+                std::tie(lhs_info_img, rhs_info_img) =
+                    configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                           std::make_pair(lhs_info_buf, rhs_info_buf),
-                                           n, k, b, DataType::F16);
+                                           std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
             }
         }
     }
     else
     {
-        std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 8, 4, 1, 2, false, false, false, false, false);
+        std::tie(lhs_info_buf, rhs_info_buf) =
+            configure_lhs_rhs_info(m, n, 5, 8, 4, 1, 2, false, false, false, false, false);
 
-        if(workload <= 362.6000f)
+        if (workload <= 362.6000f)
         {
             return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false);
         }
         else
         {
-            if(r_mn <= 22.6067f)
+            if (r_mn <= 22.6067f)
             {
-                if(workload <= 708.8000f)
+                if (workload <= 708.8000f)
                 {
-                    std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
+                    std::tie(lhs_info_img, rhs_info_img) =
+                        configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F16);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
                 }
                 else
                 {
@@ -371,27 +394,28 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
             }
             else
             {
-                if(r_nk <= 0.0917f)
+                if (r_nk <= 0.0917f)
                 {
                     return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false);
                 }
                 else
                 {
-                    std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
+                    std::tie(lhs_info_img, rhs_info_img) =
+                        configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F16);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
                 }
             }
         }
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
 
-    if(m == 1)
+    if (m == 1)
     {
         return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
     }
@@ -400,15 +424,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
         const float r_mn     = static_cast<float>(m) / static_cast<float>(n);
         const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
 
-        if(workload <= 7449.60f)
+        if (workload <= 7449.60f)
         {
-            if(workload <= 691.60f)
+            if (workload <= 691.60f)
             {
                 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 8, false, false, false, false, false);
             }
             else
             {
-                if(workload <= 4155.20f)
+                if (workload <= 4155.20f)
                 {
                     return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
                 }
@@ -420,21 +444,22 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
         }
         else
         {
-            if(workload <= 16300.80f)
+            if (workload <= 16300.80f)
             {
-                if(r_mn <= 44.56f)
+                if (r_mn <= 44.56f)
                 {
                     GEMMLHSMatrixInfo lhs_info_buf;
                     GEMMRHSMatrixInfo rhs_info_buf;
                     GEMMLHSMatrixInfo lhs_info_img;
                     GEMMRHSMatrixInfo rhs_info_img;
 
-                    std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, false, true, false, false, true);
-                    std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
+                    std::tie(lhs_info_img, rhs_info_img) =
+                        configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, false, true, false, false, true);
+                    std::tie(lhs_info_buf, rhs_info_buf) =
+                        configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
 
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F16);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
                 }
                 else
                 {
@@ -448,23 +473,25 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
                 GEMMLHSMatrixInfo lhs_info_img;
                 GEMMRHSMatrixInfo rhs_info_img;
 
-                std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, true, false, false, true);
-                std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
+                std::tie(lhs_info_img, rhs_info_img) =
+                    configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, true, false, false, true);
+                std::tie(lhs_info_buf, rhs_info_buf) =
+                    configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
 
                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                           std::make_pair(lhs_info_buf, rhs_info_buf),
-                                           n, k, b, DataType::F16);
+                                           std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
             }
         }
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
         const unsigned int n0 = n < 1280 ? 2 : 4;
         const unsigned int h0 = std::max(n / n0, 1U);
@@ -476,14 +503,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(dot8_supported(CLKernelLibrary::get().get_device()))
+    if (dot8_supported(CLKernelLibrary::get().get_device()))
     {
-        if(m == 1)
+        if (m == 1)
         {
             const unsigned int h0 = std::max(n / 2, 1U);
             return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
@@ -497,7 +525,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     else
     {
         const int h0 = std::max(std::min(static_cast<int>(n / 2), static_cast<int>(128)), static_cast<int>(1));
-        if(m == 1)
+        if (m == 1)
         {
             return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, h0, false, true, false, true);
         }
@@ -508,12 +536,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
         const unsigned int h0 = std::max(n / 2, 1U);
         return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
@@ -524,12 +553,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
         const unsigned int h0 = std::max(n / 2, 1U);
         return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, false, true, false, true);
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h
index 321cbb5250..77c0c8d500 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h
@@ -45,21 +45,34 @@ public:
     ClGemmDefaultConfigReshapedRhsOnlyBifrost(GPUTarget gpu);
 
     // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
 
 private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G31_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G31_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 };
 } // namespace gemm
 } // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp
index d08bf84c72..da3e2ec912 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp
@@ -50,30 +50,35 @@ ClGemmDefaultConfigReshapedRhsOnlyValhall::ClGemmDefaultConfigReshapedRhsOnlyVal
 {
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
 {
-    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigReshapedRhsOnlyValhall::*)(unsigned int m, unsigned int n, unsigned int k,
-                                             unsigned int b);
+    using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+        ClGemmDefaultConfigReshapedRhsOnlyValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32,
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16,
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16,
-                                                                    &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32,
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16,
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G710(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32,
-                                                                     &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G710_f16,
-                                                                     &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G710(
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32,
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G710_f16,
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
 
-    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G715(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32,
-                                                                     &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16,
-                                                                     &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
+    CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G715(
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32,
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16,
+        &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
 
     ConfigurationFunctionExecutorPtr func = nullptr;
 
-    switch(_target)
+    switch (_target)
     {
         case GPUTarget::G78:
             func = configs_G78.get_function(data_type);
@@ -96,29 +101,29 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     return (this->*func)(m, n, k, b);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
-    if(m == 1)
+    if (m == 1)
     {
         const float r_mn = static_cast<float>(m) / static_cast<float>(n);
         const float r_mk = static_cast<float>(m) / static_cast<float>(k);
 
-        if(r_mk <= 0.0064484127797186375)
+        if (r_mk <= 0.0064484127797186375)
         {
-            if(r_mn <= 0.0028273810748942196)
+            if (r_mn <= 0.0028273810748942196)
             {
                 GEMMLHSMatrixInfo lhs_info_buf;
                 GEMMRHSMatrixInfo rhs_info_buf;
                 GEMMLHSMatrixInfo lhs_info_img;
                 GEMMRHSMatrixInfo rhs_info_img;
 
-                const unsigned int h0 = std::max(n / 4, 1U);
+                const unsigned int h0                = std::max(n / 4, 1U);
                 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, 0, 1, 0, 0, 1);
                 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, 0, 1, 0, 1, 0);
 
                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                           std::make_pair(lhs_info_buf, rhs_info_buf),
-                                           n, k, b, DataType::F32);
+                                           std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
             }
             else
             {
@@ -127,7 +132,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
         }
         else
         {
-            if(r_mk <= 0.020312500186264515)
+            if (r_mk <= 0.020312500186264515)
             {
                 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, 0, 1, 0, 0, 0);
             }
@@ -143,9 +148,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
         const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
         const float r_mk     = static_cast<float>(m) / static_cast<float>(k);
 
-        if(workload <= 1999.2000122070312)
+        if (workload <= 1999.2000122070312)
         {
-            if(workload <= 747.1999816894531)
+            if (workload <= 747.1999816894531)
             {
                 return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
             }
@@ -159,15 +164,14 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
                 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
 
                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                           std::make_pair(lhs_info_buf, rhs_info_buf),
-                                           n, k, b, DataType::F32);
+                                           std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
             }
         }
         else
         {
-            if(r_mn <= 0.03348214365541935)
+            if (r_mn <= 0.03348214365541935)
             {
-                if(r_mk <= 0.028125000186264515)
+                if (r_mk <= 0.028125000186264515)
                 {
                     return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
                 }
@@ -181,8 +185,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
                     std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
 
                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                               std::make_pair(lhs_info_buf, rhs_info_buf),
-                                               n, k, b, DataType::F32);
+                                               std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
                 }
             }
             else
@@ -195,168 +198,112 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
                 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 16, 0, 1, 0, 1, 0);
 
                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
-                                           std::make_pair(lhs_info_buf, rhs_info_buf),
-                                           n, k, b, DataType::F32);
+                                           std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
             }
         }
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
-    const GeMMConfigsMatrix configs_1nkb_best =
-    {
-        { 1, 8984, 640, 1, 1, 8, 8, 1, 0, 1, 1, 1, 1, 0 },
-        { 1, 420, 392, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 },
-        { 1, 644, 5288, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 },
-        { 1, 6512, 6404, 1, 1, 4, 8, 1, 0, 1, 0, 1, 0, 0 },
-        { 1, 5304, 640, 1, 1, 4, 4, 1, 0, 1, 0, 1, 1, 0 },
-        { 1, 1352, 1520, 1, 1, 2, 8, 1, 0, 1, 1, 1, 1, 0 },
-        { 1, 4096, 25088, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0 },
-        { 1, 732, 8988, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 }
-    };
-
-    const GeMMConfigsMatrix configs_mnkb_n_small_best =
-    {
-        { 102400, 4, 96, 1, 2, 2, 16, 1, 4, 1, 1, 1, 1, 0 },
-        { 102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 1, 1 },
-        { 16384, 4, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 1, 1 },
-        { 16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 1 }
-    };
-
-    const GeMMConfigsMatrix configs_mnkb_n_small_fallback =
-    {
-        { 102400, 4, 96, 1, 2, 2, 16, 1, 4, 1, 1, 1, 1, 0 },
-        { 102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 0 },
-        { 16384, 4, 128, 1, 2, 2, 16, 1, 2, 1, 1, 1, 1, 0 },
-        { 16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 0 }
-    };
-
-    const GeMMConfigsMatrix configs_mnkb_m_gt_n_best =
-    {
-        { 25584, 88, 16, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
-        { 25584, 16, 68, 1, 4, 4, 8, 1, 16, 1, 1, 1, 0, 1 },
-        { 369664, 32, 28, 1, 5, 4, 4, 1, 64, 1, 1, 1, 0, 1 },
-        { 65792, 44, 24, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
-        { 23036, 56, 736, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
-        { 90968, 40, 600, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
-        { 8944, 32, 776, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
-        { 50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
-        { 16544, 104, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
-        { 12604, 60, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
-        { 29584, 32, 28, 1, 4, 4, 4, 1, 128, 1, 1, 1, 0, 0 },
-        { 12544, 32, 27, 1, 2, 8, 8, 1, 128, 1, 1, 1, 0, 0 },
-        { 2688, 136, 1492, 1, 8, 4, 4, 1, 128, 1, 1, 1, 0, 0 },
-        { 3728, 96, 196, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 }
-    };
-
-    const GeMMConfigsMatrix configs_mnkb_m_gt_n_fallback =
-    {
-        { 25584, 88, 16, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
-        { 25584, 16, 68, 1, 2, 4, 8, 1, 4, 1, 1, 1, 0, 0 },
-        { 369664, 32, 28, 1, 5, 4, 4, 1, 256, 1, 1, 1, 0, 0 },
-        { 65792, 44, 24, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
-        { 23036, 56, 736, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 90968, 40, 600, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 8944, 32, 776, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0 },
-        { 50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
-        { 16544, 104, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0 },
-        { 12604, 60, 160, 1, 4, 4, 8, 1, 256, 1, 1, 1, 0, 0 },
-        { 29584, 32, 28, 1, 4, 4, 4, 1, 128, 1, 1, 1, 0, 0 },
-        { 12544, 32, 27, 1, 2, 8, 8, 1, 128, 1, 1, 1, 0, 0 },
-        { 2688, 136, 1492, 1, 8, 4, 4, 1, 128, 1, 1, 1, 0, 0 },
-        { 3728, 96, 196, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 }
-    };
-
-    const GeMMConfigsMatrix configs_mnkb_n_gt_m_best =
-    {
-        { 24, 488, 88, 1, 2, 4, 16, 1, 4, 1, 1, 1, 0, 0 },
-        { 49, 1024, 512, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 1 },
-        { 49, 1024, 1024, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
-    };
-
-    const GeMMConfigsMatrix configs_mnkb_n_gt_m_fallback =
-    {
-        { 24, 488, 88, 1, 2, 4, 16, 1, 4, 1, 1, 1, 0, 0 },
-        { 49, 1024, 512, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
-        { 49, 1024, 1024, 1, 4, 4, 8, 1, 256, 1, 1, 1, 0, 0 },
+    const GeMMConfigsMatrix configs_1nkb_best = {
+        {1, 8984, 640, 1, 1, 8, 8, 1, 0, 1, 1, 1, 1, 0},    {1, 420, 392, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0},
+        {1, 644, 5288, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0},    {1, 6512, 6404, 1, 1, 4, 8, 1, 0, 1, 0, 1, 0, 0},
+        {1, 5304, 640, 1, 1, 4, 4, 1, 0, 1, 0, 1, 1, 0},    {1, 1352, 1520, 1, 1, 2, 8, 1, 0, 1, 1, 1, 1, 0},
+        {1, 4096, 25088, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0}, {1, 732, 8988, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0}};
+
+    const GeMMConfigsMatrix configs_mnkb_n_small_best = {{102400, 4, 96, 1, 2, 2, 16, 1, 4, 1, 1, 1, 1, 0},
+                                                         {102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 1, 1},
+                                                         {16384, 4, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 1, 1},
+                                                         {16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 1}};
+
+    const GeMMConfigsMatrix configs_mnkb_n_small_fallback = {{102400, 4, 96, 1, 2, 2, 16, 1, 4, 1, 1, 1, 1, 0},
+                                                             {102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 0},
+                                                             {16384, 4, 128, 1, 2, 2, 16, 1, 2, 1, 1, 1, 1, 0},
+                                                             {16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 0}};
+
+    const GeMMConfigsMatrix configs_mnkb_m_gt_n_best = {
+        {25584, 88, 16, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0},     {25584, 16, 68, 1, 4, 4, 8, 1, 16, 1, 1, 1, 0, 1},
+        {369664, 32, 28, 1, 5, 4, 4, 1, 64, 1, 1, 1, 0, 1},   {65792, 44, 24, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0},
+        {23036, 56, 736, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},   {90968, 40, 600, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},
+        {8944, 32, 776, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},    {50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0},
+        {16544, 104, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},  {12604, 60, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},
+        {29584, 32, 28, 1, 4, 4, 4, 1, 128, 1, 1, 1, 0, 0},   {12544, 32, 27, 1, 2, 8, 8, 1, 128, 1, 1, 1, 0, 0},
+        {2688, 136, 1492, 1, 8, 4, 4, 1, 128, 1, 1, 1, 0, 0}, {3728, 96, 196, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0}};
+
+    const GeMMConfigsMatrix configs_mnkb_m_gt_n_fallback = {
+        {25584, 88, 16, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0},     {25584, 16, 68, 1, 2, 4, 8, 1, 4, 1, 1, 1, 0, 0},
+        {369664, 32, 28, 1, 5, 4, 4, 1, 256, 1, 1, 1, 0, 0},  {65792, 44, 24, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0},
+        {23036, 56, 736, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},   {90968, 40, 600, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},
+        {8944, 32, 776, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0},    {50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0},
+        {16544, 104, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0},  {12604, 60, 160, 1, 4, 4, 8, 1, 256, 1, 1, 1, 0, 0},
+        {29584, 32, 28, 1, 4, 4, 4, 1, 128, 1, 1, 1, 0, 0},   {12544, 32, 27, 1, 2, 8, 8, 1, 128, 1, 1, 1, 0, 0},
+        {2688, 136, 1492, 1, 8, 4, 4, 1, 128, 1, 1, 1, 0, 0}, {3728, 96, 196, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0}};
+
+    const GeMMConfigsMatrix configs_mnkb_n_gt_m_best = {
+        {24, 488, 88, 1, 2, 4, 16, 1, 4, 1, 1, 1, 0, 0},
+        {49, 1024, 512, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 1},
+        {49, 1024, 1024, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},
     };
 
-    const GeMMConfigsMatrix configs_mnkb_squared_best =
-    {
-        { 72, 92, 136, 1, 2, 2, 8, 1, 128, 1, 1, 1, 1, 0 },
-        { 268, 824, 5076, 1, 4, 8, 4, 1, 256, 1, 1, 1, 0, 0 },
-        { 180, 420, 952, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
-        { 1000, 152, 304, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
-        { 272, 400, 2116, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 196, 512, 512, 1, 5, 4, 4, 1, 64, 1, 1, 1, 0, 1 },
-        { 24, 88, 236, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0 },
-        { 24, 88, 488, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0 }
+    const GeMMConfigsMatrix configs_mnkb_n_gt_m_fallback = {
+        {24, 488, 88, 1, 2, 4, 16, 1, 4, 1, 1, 1, 0, 0},
+        {49, 1024, 512, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+        {49, 1024, 1024, 1, 4, 4, 8, 1, 256, 1, 1, 1, 0, 0},
     };
 
-    const GeMMConfigsMatrix configs_mnkb_squared_fallback =
-    {
-        { 72, 92, 136, 1, 2, 2, 8, 1, 128, 1, 1, 1, 1, 0 },
-        { 268, 824, 5076, 1, 4, 8, 4, 1, 256, 1, 1, 1, 0, 0 },
-        { 180, 420, 952, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
-        { 1000, 152, 304, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
-        { 272, 400, 2116, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 196, 512, 512, 1, 5, 4, 4, 1, 256, 1, 1, 1, 0, 0 },
-        { 24, 88, 236, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0 },
-        { 24, 88, 488, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0 }
-    };
-
-    const GeMMConfigsMatrix configs_mnkb_best_batched =
-    {
-        { 3136, 64, 64, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 4096, 48, 32, 36, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
-        { 688, 92, 68, 32, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 24, 464, 412, 24, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
-        { 112, 184, 144, 28, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 5776, 64, 32, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 1568, 64, 40, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 2920, 64, 64, 24, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 }
-    };
-
-    const GeMMConfigsMatrix configs_mnkb_fallback_batched =
-    {
-        { 3136, 64, 64, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 4096, 48, 32, 36, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
-        { 688, 92, 68, 32, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 24, 464, 412, 24, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
-        { 112, 184, 144, 28, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 5776, 64, 32, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 1568, 64, 40, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
-        { 2920, 64, 64, 24, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 }
-    };
+    const GeMMConfigsMatrix configs_mnkb_squared_best = {
+        {72, 92, 136, 1, 2, 2, 8, 1, 128, 1, 1, 1, 1, 0},   {268, 824, 5076, 1, 4, 8, 4, 1, 256, 1, 1, 1, 0, 0},
+        {180, 420, 952, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},  {1000, 152, 304, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+        {272, 400, 2116, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {196, 512, 512, 1, 5, 4, 4, 1, 64, 1, 1, 1, 0, 1},
+        {24, 88, 236, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0},    {24, 88, 488, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0}};
+
+    const GeMMConfigsMatrix configs_mnkb_squared_fallback = {
+        {72, 92, 136, 1, 2, 2, 8, 1, 128, 1, 1, 1, 1, 0},   {268, 824, 5076, 1, 4, 8, 4, 1, 256, 1, 1, 1, 0, 0},
+        {180, 420, 952, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0}, {1000, 152, 304, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+        {272, 400, 2116, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {196, 512, 512, 1, 5, 4, 4, 1, 256, 1, 1, 1, 0, 0},
+        {24, 88, 236, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0},    {24, 88, 488, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0}};
+
+    const GeMMConfigsMatrix configs_mnkb_best_batched = {
+        {3136, 64, 64, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},  {4096, 48, 32, 36, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},
+        {688, 92, 68, 32, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},   {24, 464, 412, 24, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+        {112, 184, 144, 28, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {5776, 64, 32, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},
+        {1568, 64, 40, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},  {2920, 64, 64, 24, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}};
+
+    const GeMMConfigsMatrix configs_mnkb_fallback_batched = {
+        {3136, 64, 64, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},  {4096, 48, 32, 36, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+        {688, 92, 68, 32, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},   {24, 464, 412, 24, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+        {112, 184, 144, 28, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {5776, 64, 32, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},
+        {1568, 64, 40, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},  {2920, 64, 64, 24, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}};
 
     const GeMMConfigsMatrix *configs_best_to_use     = nullptr;
     const GeMMConfigsMatrix *configs_fallback_to_use = nullptr;
 
-    if(b == 1)
+    if (b == 1)
     {
         constexpr float        ratio_m_gt_n = 10.f;
         constexpr float        ratio_n_gt_m = 0.1f;
         constexpr unsigned int n_small_thr  = 4;
         const float            ratio        = static_cast<float>(m) / static_cast<float>(n);
 
-        if(m == 1)
+        if (m == 1)
         {
             // We do not need fallback in this case, as we never use cl_image for the rhs tensor
             configs_best_to_use     = &configs_1nkb_best;
             configs_fallback_to_use = &configs_1nkb_best;
         }
-        else if(n <= n_small_thr && ratio > ratio_m_gt_n)
+        else if (n <= n_small_thr && ratio > ratio_m_gt_n)
         {
             configs_best_to_use     = &configs_mnkb_n_small_best;
             configs_fallback_to_use = &configs_mnkb_n_small_fallback;
         }
-        else if(ratio > ratio_m_gt_n)
+        else if (ratio > ratio_m_gt_n)
         {
             configs_best_to_use     = &configs_mnkb_m_gt_n_best;
             configs_fallback_to_use = &configs_mnkb_m_gt_n_fallback;
         }
-        else if(ratio < ratio_n_gt_m)
+        else if (ratio < ratio_n_gt_m)
         {
             configs_best_to_use     = &configs_mnkb_n_gt_m_best;
             configs_fallback_to_use = &configs_mnkb_n_gt_m_fallback;
@@ -381,17 +328,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     std::tie(lhs_info0, rhs_info0) = find_lhs_rhs_info(*configs_best_to_use, m, n, k, b);
     std::tie(lhs_info1, rhs_info1) = find_lhs_rhs_info(*configs_fallback_to_use, m, n, k, b);
 
-    return select_lhs_rhs_info(std::make_pair(lhs_info0, rhs_info0),
-                               std::make_pair(lhs_info1, rhs_info1),
-                               n, k, b, DataType::F16);
+    return select_lhs_rhs_info(std::make_pair(lhs_info0, rhs_info0), std::make_pair(lhs_info1, rhs_info1), n, k, b,
+                               DataType::F16);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
-    if(m == 1)
+    if (m == 1)
     {
         const unsigned int h0 = std::max(n / 2, 1U);
         return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, 0, 1, 0, 1);
@@ -399,7 +346,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     else
     {
         const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
-        if(m >= 28)
+        if (m >= 28)
         {
             return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, h0, 0, 1, 0, 1);
         }
@@ -410,30 +357,31 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     const float r_mn     = static_cast<float>(m) / static_cast<float>(n);
     const float r_mk     = static_cast<float>(m) / static_cast<float>(k);
     const float r_nk     = static_cast<float>(n) / static_cast<float>(k);
     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
 
-    if(m == 1)
+    if (m == 1)
     {
-        if(workload <= 278.7000f)
+        if (workload <= 278.7000f)
         {
-            if(workload <= 7.5000f)
+            if (workload <= 7.5000f)
             {
                 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
             }
             else
             {
-                if(r_mn <= 0.0031f)
+                if (r_mn <= 0.0031f)
                 {
-                    if(workload <= 256.6000f)
+                    if (workload <= 256.6000f)
                     {
-                        if(workload <= 16.7500f)
+                        if (workload <= 16.7500f)
                         {
-                            if(r_nk <= 1.6671f)
+                            if (r_nk <= 1.6671f)
                             {
                                 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
                             }
@@ -454,15 +402,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
                 }
                 else
                 {
-                    if(r_mk <= 0.0027f)
+                    if (r_mk <= 0.0027f)
                     {
-                        if(r_mk <= 0.0014f)
+                        if (r_mk <= 0.0014f)
                         {
                             return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
                         }
                         else
                         {
-                            if(workload <= 8.9500f)
+                            if (workload <= 8.9500f)
                             {
                                 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
                             }
@@ -474,13 +422,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
                     }
                     else
                     {
-                        if(workload <= 14.1500f)
+                        if (workload <= 14.1500f)
                         {
                             return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
                         }
                         else
                         {
-                            if(r_mk <= 0.0041f)
+                            if (r_mk <= 0.0041f)
                             {
                                 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
                             }
@@ -495,9 +443,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
         }
         else
         {
-            if(workload <= 363.7000f)
+            if (workload <= 363.7000f)
             {
-                if(r_mk <= 0.0031f)
+                if (r_mk <= 0.0031f)
                 {
                     return configure_lhs_rhs_info(m, n, 1, 4, 2, 1, 32, 0, 1, 0, 1, 0);
                 }
@@ -514,9 +462,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
     else
     {
-        if(workload <= 1384.8000f)
+        if (workload <= 1384.8000f)
         {
-            if(workload <= 704.0000f)
+            if (workload <= 704.0000f)
             {
                 return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 32, 0, 1, 0, 1, 0);
             }
@@ -527,9 +475,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
         }
         else
         {
-            if(workload <= 16761.6006f)
+            if (workload <= 16761.6006f)
             {
-                if(r_mn <= 187.1250f)
+                if (r_mn <= 187.1250f)
                 {
                     return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 16, 0, 0, 0, 1, 1);
                 }
@@ -540,7 +488,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
             }
             else
             {
-                if(r_mk <= 432.4630f)
+                if (r_mk <= 432.4630f)
                 {
                     return configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 16, 0, 0, 0, 1, 1);
                 }
@@ -553,42 +501,37 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
     const float r_mn     = static_cast<float>(m) / static_cast<float>(n);
     const float r_mk     = static_cast<float>(m) / static_cast<float>(k);
     const float r_nk     = static_cast<float>(n) / static_cast<float>(k);
 
-    if(m == 1)
+    if (m == 1)
     {
-        const GeMMConfigsMatrix configs_mnkb_best =
-        {
-            { 1, 8984, 640, 1, 1, 4, 2, 1, 0, 1, 0, 1, 1, 0 },
-            { 1, 420, 392, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
-            { 1, 644, 5288, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
-            { 1, 6512, 6404, 1, 1, 2, 2, 1, 0, 1, 0, 1, 1, 0 },
-            { 1, 5304, 640, 1, 1, 2, 2, 1, 0, 1, 0, 1, 0, 0 },
-            { 1, 1352, 1520, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
-            { 1, 4096, 25088, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
-            { 1, 732, 8988, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 }
-        };
+        const GeMMConfigsMatrix configs_mnkb_best = {
+            {1, 8984, 640, 1, 1, 4, 2, 1, 0, 1, 0, 1, 1, 0},   {1, 420, 392, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0},
+            {1, 644, 5288, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0},   {1, 6512, 6404, 1, 1, 2, 2, 1, 0, 1, 0, 1, 1, 0},
+            {1, 5304, 640, 1, 1, 2, 2, 1, 0, 1, 0, 1, 0, 0},   {1, 1352, 1520, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0},
+            {1, 4096, 25088, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0}, {1, 732, 8988, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0}};
 
         return find_lhs_rhs_info(configs_mnkb_best, m, n, k, b);
     }
     else
     {
-        if(workload <= 1384.8000f)
+        if (workload <= 1384.8000f)
         {
-            if(r_nk <= 0.8333f)
+            if (r_nk <= 0.8333f)
             {
-                if(r_mk <= 0.9119f)
+                if (r_mk <= 0.9119f)
                 {
                     return configure_lhs_rhs_info(m, n, 2, 2, 16, 1, 4, 0, 1, 0, 1, 1);
                 }
                 else
                 {
-                    if(r_nk <= 0.1181f)
+                    if (r_nk <= 0.1181f)
                     {
                         return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 32, 0, 0, 1, 0, 0);
                     }
@@ -600,7 +543,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
             }
             else
             {
-                if(r_mk <= 1.0013f)
+                if (r_mk <= 1.0013f)
                 {
                     return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 1);
                 }
@@ -612,11 +555,11 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
         }
         else
         {
-            if(workload <= 11404.7998f)
+            if (workload <= 11404.7998f)
             {
-                if(r_mk <= 2.2884f)
+                if (r_mk <= 2.2884f)
                 {
-                    if(r_nk <= 0.9286f)
+                    if (r_nk <= 0.9286f)
                     {
                         return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 4, 0, 1, 1, 0, 1);
                     }
@@ -632,9 +575,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
             }
             else
             {
-                if(r_nk <= 1.1926f)
+                if (r_nk <= 1.1926f)
                 {
-                    if(r_mn <= 1385.7917f)
+                    if (r_mn <= 1385.7917f)
                     {
                         return configure_lhs_rhs_info(m, n, 6, 4, 8, 1, 4, 0, 1, 1, 0, 1);
                     }
@@ -652,12 +595,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     unsigned int best_m0;
     unsigned int best_n0;
 
-    if(is_mmul_kernel_preferred(m, n, k, b, DataType::F32, best_m0, best_n0))
+    if (is_mmul_kernel_preferred(m, n, k, b, DataType::F32, best_m0, best_n0))
     {
         return configure_lhs_rhs_info(m, n, best_m0, best_n0, 1, 1, 4, false, true, false, false, true);
     }
@@ -667,153 +611,101 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     }
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G710_f16(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
-    const GeMMConfigsMatrix configs_1nkb_best =
-    {
-        { 1, 8984, 640, 1, 1, 2, 2, 1, 0, 1, 0, 1, 0, 0 },
-        { 1, 420, 392, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 },
-        { 1, 644, 5288, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 },
-        { 1, 6512, 6404, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
-        { 1, 5304, 640, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
-        { 1, 1352, 1520, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
-        { 1, 4096, 25088, 1, 1, 2, 8, 1, 0, 1, 0, 1, 1, 0 },
-        { 1, 732, 8988, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 }
+    const GeMMConfigsMatrix configs_1nkb_best = {
+        {1, 8984, 640, 1, 1, 2, 2, 1, 0, 1, 0, 1, 0, 0},   {1, 420, 392, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0},
+        {1, 644, 5288, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0},   {1, 6512, 6404, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0},
+        {1, 5304, 640, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0},   {1, 1352, 1520, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0},
+        {1, 4096, 25088, 1, 1, 2, 8, 1, 0, 1, 0, 1, 1, 0}, {1, 732, 8988, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0}};
+
+    const GeMMConfigsMatrix configs_mnkb_n_small_best = {{102400, 4, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0},
+                                                         {102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0},
+                                                         {16384, 4, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0},
+                                                         {16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0}};
+
+    const GeMMConfigsMatrix configs_mnkb_m_gt_n_best = {
+        {25584, 88, 16, 1, 4, 8, 4, 1, 4, 1, 1, 1, 0, 0},    {25584, 16, 68, 1, 2, 4, 16, 1, 8, 1, 1, 1, 0, 1},
+        {369664, 32, 28, 1, 2, 8, 4, 1, 128, 1, 1, 1, 0, 0}, {65792, 44, 24, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0},
+        {23036, 56, 736, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},   {90968, 40, 600, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+        {8944, 32, 776, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},    {2688, 136, 1492, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+        {50176, 64, 300, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1},   {16544, 104, 160, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+        {12604, 60, 160, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},   {3728, 96, 196, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+        {29584, 32, 28, 1, 2, 8, 4, 1, 16, 1, 1, 1, 0, 0},   {12544, 32, 27, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0},
     };
 
-    const GeMMConfigsMatrix configs_mnkb_n_small_best =
-    {
-        { 102400, 4, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0 },
-        { 102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0 },
-        { 16384, 4, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0 },
-        { 16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0 }
+    const GeMMConfigsMatrix configs_mnkb_m_gt_n_fallback = {
+        {25584, 88, 16, 1, 4, 8, 4, 1, 4, 1, 1, 1, 0, 0},    {25584, 16, 68, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0},
+        {369664, 32, 28, 1, 2, 8, 4, 1, 128, 1, 1, 1, 0, 0}, {65792, 44, 24, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0},
+        {23036, 56, 736, 1, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0},  {90968, 40, 600, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 0},
+        {8944, 32, 776, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0},   {2688, 136, 1492, 1, 4, 4, 8, 1, 8, 1, 1, 1, 0, 0},
+        {50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0}, {16544, 104, 160, 1, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0},
+        {12604, 60, 160, 1, 2, 8, 8, 1, 8, 1, 1, 1, 0, 0},   {3728, 96, 196, 1, 2, 8, 8, 1, 64, 1, 1, 1, 0, 0},
+        {29584, 32, 28, 1, 2, 8, 4, 1, 16, 1, 1, 1, 0, 0},   {12544, 32, 27, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0},
     };
 
-    const GeMMConfigsMatrix configs_mnkb_m_gt_n_best =
-    {
-        { 25584, 88, 16, 1, 4, 8, 4, 1, 4, 1, 1, 1, 0, 0 },
-        { 25584, 16, 68, 1, 2, 4, 16, 1, 8, 1, 1, 1, 0, 1 },
-        { 369664, 32, 28, 1, 2, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
-        { 65792, 44, 24, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
-        { 23036, 56, 736, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 90968, 40, 600, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 8944, 32, 776, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 2688, 136, 1492, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 50176, 64, 300, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1 },
-        { 16544, 104, 160, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 12604, 60, 160, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 3728, 96, 196, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 29584, 32, 28, 1, 2, 8, 4, 1, 16, 1, 1, 1, 0, 0 },
-        { 12544, 32, 27, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0 },
-    };
+    const GeMMConfigsMatrix configs_mnkb_n_gt_m_best = {{24, 488, 88, 1, 2, 2, 8, 1, 8, 1, 1, 1, 1, 0},
+                                                        {49, 1024, 512, 1, 2, 4, 8, 1, 8, 1, 1, 1, 1, 0},
+                                                        {49, 1024, 1024, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0}};
 
-    const GeMMConfigsMatrix configs_mnkb_m_gt_n_fallback =
-    {
-        { 25584, 88, 16, 1, 4, 8, 4, 1, 4, 1, 1, 1, 0, 0 },
-        { 25584, 16, 68, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0 },
-        { 369664, 32, 28, 1, 2, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
-        { 65792, 44, 24, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
-        { 23036, 56, 736, 1, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0 },
-        { 90968, 40, 600, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 0 },
-        { 8944, 32, 776, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0 },
-        { 2688, 136, 1492, 1, 4, 4, 8, 1, 8, 1, 1, 1, 0, 0 },
-        { 50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
-        { 16544, 104, 160, 1, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0 },
-        { 12604, 60, 160, 1, 2, 8, 8, 1, 8, 1, 1, 1, 0, 0 },
-        { 3728, 96, 196, 1, 2, 8, 8, 1, 64, 1, 1, 1, 0, 0 },
-        { 29584, 32, 28, 1, 2, 8, 4, 1, 16, 1, 1, 1, 0, 0 },
-        { 12544, 32, 27, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0 },
-    };
+    const GeMMConfigsMatrix configs_mnkb_n_gt_m_fallback = {{24, 488, 88, 1, 2, 2, 8, 1, 8, 1, 1, 1, 1, 0},
+                                                            {49, 1024, 512, 1, 2, 4, 8, 1, 8, 1, 1, 1, 1, 0},
+                                                            {49, 1024, 1024, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0}};
 
-    const GeMMConfigsMatrix configs_mnkb_n_gt_m_best =
-    {
-        { 24, 488, 88, 1, 2, 2, 8, 1, 8, 1, 1, 1, 1, 0 },
-        { 49, 1024, 512, 1, 2, 4, 8, 1, 8, 1, 1, 1, 1, 0 },
-        { 49, 1024, 1024, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0 }
+    const GeMMConfigsMatrix configs_mnkb_squared_best = {
+        {24, 88, 236, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0},    {24, 88, 488, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0},
+        {72, 92, 136, 1, 2, 2, 8, 1, 32, 1, 1, 1, 1, 0},   {268, 824, 5076, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+        {180, 420, 952, 1, 4, 4, 8, 1, 16, 1, 1, 1, 0, 1}, {1000, 152, 304, 1, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0},
+        {272, 400, 2116, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1}, {196, 512, 512, 1, 5, 2, 8, 1, 4, 1, 1, 1, 1, 1},
     };
 
-    const GeMMConfigsMatrix configs_mnkb_n_gt_m_fallback =
-    {
-        { 24, 488, 88, 1, 2, 2, 8, 1, 8, 1, 1, 1, 1, 0 },
-        { 49, 1024, 512, 1, 2, 4, 8, 1, 8, 1, 1, 1, 1, 0 },
-        { 49, 1024, 1024, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0 }
-    };
-
-    const GeMMConfigsMatrix configs_mnkb_squared_best =
-    {
-        { 24, 88, 236, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0 },
-        { 24, 88, 488, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0 },
-        { 72, 92, 136, 1, 2, 2, 8, 1, 32, 1, 1, 1, 1, 0 },
-        { 268, 824, 5076, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 180, 420, 952, 1, 4, 4, 8, 1, 16, 1, 1, 1, 0, 1 },
-        { 1000, 152, 304, 1, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0 },
-        { 272, 400, 2116, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 196, 512, 512, 1, 5, 2, 8, 1, 4, 1, 1, 1, 1, 1 },
+    const GeMMConfigsMatrix configs_mnkb_squared_fallback = {
+        {24, 88, 236, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0},    {24, 88, 488, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0},
+        {72, 92, 136, 1, 2, 2, 8, 1, 32, 1, 1, 1, 1, 0},   {268, 824, 5076, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0},
+        {180, 420, 952, 1, 5, 2, 8, 1, 8, 1, 1, 1, 1, 0},  {1000, 152, 304, 1, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0},
+        {272, 400, 2116, 1, 2, 8, 4, 1, 4, 1, 1, 1, 0, 0}, {196, 512, 512, 1, 5, 2, 8, 1, 8, 1, 1, 1, 1, 0},
     };
 
-    const GeMMConfigsMatrix configs_mnkb_squared_fallback =
-    {
-        { 24, 88, 236, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0 },
-        { 24, 88, 488, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0 },
-        { 72, 92, 136, 1, 2, 2, 8, 1, 32, 1, 1, 1, 1, 0 },
-        { 268, 824, 5076, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
-        { 180, 420, 952, 1, 5, 2, 8, 1, 8, 1, 1, 1, 1, 0 },
-        { 1000, 152, 304, 1, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0 },
-        { 272, 400, 2116, 1, 2, 8, 4, 1, 4, 1, 1, 1, 0, 0 },
-        { 196, 512, 512, 1, 5, 2, 8, 1, 8, 1, 1, 1, 1, 0 },
-    };
+    const GeMMConfigsMatrix configs_mnkb_best_batched = {
+        {3136, 64, 64, 36, 4, 8, 4, 1, 16, 1, 1, 1, 0, 1}, {4096, 48, 32, 36, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+        {688, 92, 68, 32, 4, 8, 4, 1, 32, 1, 1, 1, 0, 1},  {24, 464, 412, 24, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+        {112, 184, 144, 28, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1}, {5776, 64, 32, 36, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+        {1568, 64, 40, 36, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1},  {2920, 64, 64, 24, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1}};
 
-    const GeMMConfigsMatrix configs_mnkb_best_batched =
-    {
-        { 3136, 64, 64, 36, 4, 8, 4, 1, 16, 1, 1, 1, 0, 1 },
-        { 4096, 48, 32, 36, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 688, 92, 68, 32, 4, 8, 4, 1, 32, 1, 1, 1, 0, 1 },
-        { 24, 464, 412, 24, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 112, 184, 144, 28, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 5776, 64, 32, 36, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
-        { 1568, 64, 40, 36, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1 },
-        { 2920, 64, 64, 24, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1 }
-    };
-
-    const GeMMConfigsMatrix configs_mnkb_fallback_batched =
-    {
-        { 3136, 64, 64, 36, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
-        { 4096, 48, 32, 36, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0 },
-        { 688, 92, 68, 32, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0 },
-        { 24, 464, 412, 24, 2, 8, 4, 1, 32, 1, 1, 1, 0, 0 },
-        { 112, 184, 144, 28, 4, 4, 8, 1, 8, 1, 1, 1, 0, 0 },
-        { 5776, 64, 32, 36, 2, 8, 8, 1, 32, 1, 1, 1, 0, 0 },
-        { 1568, 64, 40, 36, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0 },
-        { 2920, 64, 64, 24, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 }
-    };
+    const GeMMConfigsMatrix configs_mnkb_fallback_batched = {
+        {3136, 64, 64, 36, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0},  {4096, 48, 32, 36, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0},
+        {688, 92, 68, 32, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0},  {24, 464, 412, 24, 2, 8, 4, 1, 32, 1, 1, 1, 0, 0},
+        {112, 184, 144, 28, 4, 4, 8, 1, 8, 1, 1, 1, 0, 0}, {5776, 64, 32, 36, 2, 8, 8, 1, 32, 1, 1, 1, 0, 0},
+        {1568, 64, 40, 36, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0}, {2920, 64, 64, 24, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0}};
 
     const GeMMConfigsMatrix *configs_best_to_use     = nullptr;
     const GeMMConfigsMatrix *configs_fallback_to_use = nullptr;
 
-    if(b == 1)
+    if (b == 1)
     {
         constexpr float        ratio_m_gt_n = 10.f;
         constexpr float        ratio_n_gt_m = 0.1f;
         constexpr unsigned int n_small_thr  = 4;
         const float            ratio        = static_cast<float>(m) / static_cast<float>(n);
 
-        if(m == 1)
+        if (m == 1)
         {
             // We do not need fallback in this case, as we never use cl_image for the rhs tensor
             configs_best_to_use     = &configs_1nkb_best;
             configs_fallback_to_use = &configs_1nkb_best;
         }
-        else if(n <= n_small_thr && ratio > ratio_m_gt_n)
+        else if (n <= n_small_thr && ratio > ratio_m_gt_n)
         {
             configs_best_to_use     = &configs_mnkb_n_small_best;
             configs_fallback_to_use = &configs_mnkb_n_small_best;
         }
-        else if(ratio > ratio_m_gt_n)
+        else if (ratio > ratio_m_gt_n)
         {
             configs_best_to_use     = &configs_mnkb_m_gt_n_best;
             configs_fallback_to_use = &configs_mnkb_m_gt_n_fallback;
         }
-        else if(ratio < ratio_n_gt_m)
+        else if (ratio < ratio_n_gt_m)
         {
             configs_best_to_use     = &configs_mnkb_n_gt_m_best;
             configs_fallback_to_use = &configs_mnkb_n_gt_m_fallback;
@@ -838,17 +730,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
     std::tie(lhs_info0, rhs_info0) = find_lhs_rhs_info(*configs_best_to_use, m, n, k, b);
     std::tie(lhs_info1, rhs_info1) = find_lhs_rhs_info(*configs_fallback_to_use, m, n, k, b);
 
-    return select_lhs_rhs_info(std::make_pair(lhs_info0, rhs_info0),
-                               std::make_pair(lhs_info1, rhs_info1),
-                               n, k, b, DataType::F16);
+    return select_lhs_rhs_info(std::make_pair(lhs_info0, rhs_info0), std::make_pair(lhs_info1, rhs_info1), n, k, b,
+                               DataType::F16);
 }
 
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16(
+    unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     unsigned int best_m0;
     unsigned int best_n0;
 
-    if(is_mmul_kernel_preferred(m, n, k, b, DataType::F16, best_m0, best_n0))
+    if (is_mmul_kernel_preferred(m, n, k, b, DataType::F16, best_m0, best_n0))
     {
         return configure_lhs_rhs_info(m, n, best_m0, best_n0, 1, 1, 4, false, true, false, false, true);
     }
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h
index f2952a3d30..a0ea337eb1 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h
@@ -45,17 +45,26 @@ public:
     ClGemmDefaultConfigReshapedRhsOnlyValhall(GPUTarget gpu);
 
     // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
 
 private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+    configure_G715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
 };
 } // namespace gemm
 } // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h
index 1503e74eb6..e07ad993ed 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h
@@ -50,7 +50,7 @@ public:
      */
     static std::unique_ptr<IClGemmKernelConfig> create(GPUTarget gpu)
     {
-        switch(get_arch_from_target(gpu))
+        switch (get_arch_from_target(gpu))
         {
             case GPUTarget::MIDGARD:
             case GPUTarget::BIFROST: